From bde5c9925315ab45494e2839e482429fb626bf58 Mon Sep 17 00:00:00 2001 From: Nathan Miller Date: Wed, 16 Sep 2020 02:15:34 -0500 Subject: [PATCH 01/38] updated submodules --- .gitignore | 7 +++++++ overcooked_ai | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fbf22604..67adabf6 100644 --- a/.gitignore +++ b/.gitignore @@ -106,6 +106,13 @@ venv.bak/ # mypy .mypy_cache/ +# VSCode +**/.vscode/ + +# CHAI specific +**/data_dir.py +**/slack.json + # Other .DS_Store *.key diff --git a/overcooked_ai b/overcooked_ai index 6eaceb0a..321b390f 160000 --- a/overcooked_ai +++ b/overcooked_ai @@ -1 +1 @@ -Subproject commit 6eaceb0a9a2501f1b9fccbf4c7016d6662ed1108 +Subproject commit 321b390f0f2aa8310c87029bd22f3b26cbc351c8 From a1c70b2f44bbbdddc79b14678ceaf0367ee099f6 Mon Sep 17 00:00:00 2001 From: micah Date: Wed, 4 Nov 2020 11:13:18 -0800 Subject: [PATCH 02/38] Fixed overcooked commit pointer based on issue #14 --- overcooked_ai | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/overcooked_ai b/overcooked_ai index 321b390f..6eaceb0a 160000 --- a/overcooked_ai +++ b/overcooked_ai @@ -1 +1 @@ -Subproject commit 321b390f0f2aa8310c87029bd22f3b26cbc351c8 +Subproject commit 6eaceb0a9a2501f1b9fccbf4c7016d6662ed1108 From 56073fbe2fa7beb04ace087d592d58cb91fd086a Mon Sep 17 00:00:00 2001 From: micah Date: Tue, 1 Feb 2022 01:17:18 +0100 Subject: [PATCH 03/38] Fixed test issue --- human_aware_rl/rllib/rllib.py | 3 +-- overcooked_ai | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py index e93a07b0..8e1bfdea 100644 --- a/human_aware_rl/rllib/rllib.py +++ b/human_aware_rl/rllib/rllib.py @@ -193,8 +193,7 @@ def _get_featurize_fn(self, agent_id): def _get_obs(self, state): ob_p0 = self._get_featurize_fn(self.curr_agents[0])(state)[0] ob_p1 = self._get_featurize_fn(self.curr_agents[1])(state)[1] - - return ob_p0, ob_p1 + return ob_p0.astype(np.float32), ob_p1.astype(np.float32) def _populate_agents(self): # Always include at least one ppo agent (i.e. bc_sp not supported for simplicity) diff --git a/overcooked_ai b/overcooked_ai index cc8aebbe..7e774a1a 160000 --- a/overcooked_ai +++ b/overcooked_ai @@ -1 +1 @@ -Subproject commit cc8aebbe5bb2b43262c4a104a74a7d7a48517f50 +Subproject commit 7e774a1aa29c28b7b69dc0a8903822ac2c6b4f23 From 6dce6d5ac83a6856e08d0fa7205e316650e27e04 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein Date: Wed, 1 Jun 2022 17:19:50 +0200 Subject: [PATCH 04/38] package versions corrections --- install.sh | 6 + setup_corrections/setup_baselines.py | 63 ++++++++ setup_corrections/setup_main.py | 21 +++ setup_corrections/setup_overcooked.py | 16 +++ setup_corrections/setup_stable_baselines.py | 150 ++++++++++++++++++++ 5 files changed, 256 insertions(+) create mode 100644 setup_corrections/setup_baselines.py create mode 100644 setup_corrections/setup_main.py create mode 100644 setup_corrections/setup_overcooked.py create mode 100644 setup_corrections/setup_stable_baselines.py diff --git a/install.sh b/install.sh index 6aacfcda..ee695242 100755 --- a/install.sh +++ b/install.sh @@ -1,4 +1,10 @@ #!/bin/sh + +cp setup_corrections/setup_baselines.py baselines/setup.py +cp setup_corrections/setup_stable_baselines.py stable_baselines/setup.py +cp setup_corrections/setup_main.py setup.py +cp setup_corrections/setup_overcooked.py overcooked_ai/setup.py + cd baselines python setup.py develop cd .. diff --git a/setup_corrections/setup_baselines.py b/setup_corrections/setup_baselines.py new file mode 100644 index 00000000..ee3ebdf0 --- /dev/null +++ b/setup_corrections/setup_baselines.py @@ -0,0 +1,63 @@ +import re +from setuptools import setup, find_packages +import sys + +if sys.version_info.major != 3: + print('This Python is only compatible with Python 3, but you are running ' + 'Python {}. The installation will likely fail.'.format(sys.version_info.major)) + + +extras = { + 'test': [ + 'filelock', + 'pytest', + 'pytest-forked', + 'atari-py' + ], + 'bullet': [ + 'pybullet', + ], + 'mpi': [ + 'mpi4py' + ] +} + +all_deps = [] +for group_name in extras: + all_deps += extras[group_name] + +extras['all'] = all_deps + +setup(name='baselines', + packages=[package for package in find_packages() + if package.startswith('baselines')], + install_requires=[ + 'gym==0.17.2', + 'scipy==1.5.0', + 'tqdm', + 'joblib', + 'dill', + 'progressbar2', + 'cloudpickle', + 'click', + 'opencv-python' + ], + extras_require=extras, + description='OpenAI baselines: high quality implementations of reinforcement learning algorithms', + author='OpenAI', + url='https://github.com/openai/baselines', + author_email='gym@openai.com', + version='0.1.5') + + +# ensure there is some tensorflow build with version above 1.4 +import pkg_resources +tf_pkg = None +for tf_pkg_name in ['tensorflow', 'tensorflow-gpu', 'tf-nightly', 'tf-nightly-gpu']: + try: + tf_pkg = pkg_resources.get_distribution(tf_pkg_name) + except pkg_resources.DistributionNotFound: + pass +assert tf_pkg is not None, 'TensorFlow needed, of version above 1.4' +from distutils.version import LooseVersion +assert LooseVersion(re.sub(r'-?rc\d+$', '', tf_pkg.version)) >= LooseVersion('1.4.0') diff --git a/setup_corrections/setup_main.py b/setup_corrections/setup_main.py new file mode 100644 index 00000000..e4709084 --- /dev/null +++ b/setup_corrections/setup_main.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +setup(name='human_aware_rl', + version='0.0.1', + description='This package has shared components.', + author='Micah Carroll', + author_email='micah.d.carroll@berkeley.edu', + packages=find_packages(), + install_requires=[ + 'GitPython', + 'memory_profiler', + 'sacred==0.7.4', + 'pymongo', + 'numpy==1.15.1', + 'matplotlib==3.0.3', + 'seaborn==0.9.0', + 'pygame==1.9.5' + ], + ) \ No newline at end of file diff --git a/setup_corrections/setup_overcooked.py b/setup_corrections/setup_overcooked.py new file mode 100644 index 00000000..0eed6848 --- /dev/null +++ b/setup_corrections/setup_overcooked.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +setup(name='overcooked_ai', + version='0.0.1', + description='Cooperative multi-agent environment based on Overcooked', + author='Micah Carroll', + author_email='micah.d.carroll@berkeley.edu', + packages=find_packages(), + install_requires=[ + 'numpy==1.18.5', + 'tqdm', + 'gym==0.17.2' + ] + ) \ No newline at end of file diff --git a/setup_corrections/setup_stable_baselines.py b/setup_corrections/setup_stable_baselines.py new file mode 100644 index 00000000..eaca23bd --- /dev/null +++ b/setup_corrections/setup_stable_baselines.py @@ -0,0 +1,150 @@ +import sys +import subprocess +from setuptools import setup, find_packages +from distutils.version import LooseVersion + +if sys.version_info.major != 3: + print('This Python is only compatible with Python 3, but you are running ' + 'Python {}. The installation will likely fail.'.format(sys.version_info.major)) + +# Check tensorflow installation to avoid +# breaking pre-installed tf gpu +install_tf, tf_gpu = False, False +try: + import tensorflow as tf + if tf.__version__ < LooseVersion('1.5.0'): + install_tf = True + # check if a gpu version is needed + tf_gpu = tf.test.is_gpu_available() +except ImportError: + install_tf = True + # Check if a nvidia gpu is present + for command in ['nvidia-smi', '/usr/bin/nvidia-smi', 'nvidia-smi.exe']: + try: + if subprocess.call([command]) == 0: + tf_gpu = True + break + except IOError: # command does not exist / is not executable + pass + +tf_dependency = [] +if install_tf: + tf_dependency = ['tensorflow-gpu>=1.5.0'] if tf_gpu else ['tensorflow>=1.5.0'] + if tf_gpu: + print("A GPU was detected, tensorflow-gpu will be installed") + + +long_description = """ +[![Build Status](https://travis-ci.com/hill-a/stable-baselines.svg?branch=master)](https://travis-ci.com/hill-a/stable-baselines) [![Documentation Status](https://readthedocs.org/projects/stable-baselines/badge/?version=master)](https://stable-baselines.readthedocs.io/en/master/?badge=master) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/3bcb4cd6d76a4270acb16b5fe6dd9efa)](https://www.codacy.com/app/baselines_janitors/stable-baselines?utm_source=github.com&utm_medium=referral&utm_content=hill-a/stable-baselines&utm_campaign=Badge_Grade) [![Codacy Badge](https://api.codacy.com/project/badge/Coverage/3bcb4cd6d76a4270acb16b5fe6dd9efa)](https://www.codacy.com/app/baselines_janitors/stable-baselines?utm_source=github.com&utm_medium=referral&utm_content=hill-a/stable-baselines&utm_campaign=Badge_Coverage) + +# Stable Baselines + +Stable Baselines is a set of improved implementations of reinforcement learning algorithms based on OpenAI [Baselines](https://github.com/openai/baselines/). + +These algorithms will make it easier for the research community and industry to replicate, refine, and identify new ideas, and will create good baselines to build projects on top of. We expect these tools will be used as a base around which new ideas can be added, and as a tool for comparing a new approach against existing ones. We also hope that the simplicity of these tools will allow beginners to experiment with a more advanced toolset, without being buried in implementation details. + +## Main differences with OpenAI Baselines +This toolset is a fork of OpenAI Baselines, with a major structural refactoring, and code cleanups: + +- Unified structure for all algorithms +- PEP8 compliant (unified code style) +- Documented functions and classes +- More tests & more code coverage + +## Links + +Repository: +https://github.com/hill-a/stable-baselines + +Medium article: +https://medium.com/@araffin/df87c4b2fc82 + +Documentation: +https://stable-baselines.readthedocs.io/en/master/ + +RL Baselines Zoo: +https://github.com/araffin/rl-baselines-zoo + +## Quick example + +Most of the library tries to follow a sklearn-like syntax for the Reinforcement Learning algorithms using Gym. + +Here is a quick example of how to train and run PPO2 on a cartpole environment: + +```python +import gym + +from stable_baselines.common.policies import MlpPolicy +from stable_baselines.common.vec_env import DummyVecEnv +from stable_baselines import PPO2 + +env = gym.make('CartPole-v1') +env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized environment to run + +model = PPO2(MlpPolicy, env, verbose=1) +model.learn(total_timesteps=10000) + +obs = env.reset() +for i in range(1000): + action, _states = model.predict(obs) + obs, rewards, dones, info = env.step(action) + env.render() +``` + +Or just train a model with a one liner if [the environment is registered in Gym](https://github.com/openai/gym/wiki/Environments) and if [the policy is registered](https://stable-baselines.readthedocs.io/en/master/guide/custom_policy.html): + +```python +from stable_baselines import PPO2 + +model = PPO2('MlpPolicy', 'CartPole-v1').learn(10000) +``` + +""" + +setup(name='stable_baselines', + packages=[package for package in find_packages() + if package.startswith('stable_baselines')], + install_requires=[ + 'gym[atari,classic_control]>=0.10.9', + 'scipy==1.5.0', + 'tqdm', + 'joblib', + 'zmq', + 'dill', + 'mpi4py', + 'cloudpickle>=0.5.5', + 'click', + 'opencv-python', + 'numpy==1.18.5', + 'pandas==1.0.5', + 'matplotlib', + 'seaborn', + 'glob2' + ] + tf_dependency, + extras_require={ + 'tests': [ + 'pytest==3.5.1', + 'pytest-cov' + ], + 'docs': [ + 'sphinx', + 'sphinx-autobuild', + 'sphinx-rtd-theme' + ] + }, + description='A fork of OpenAI Baselines, implementations of reinforcement learning algorithms.', + author='Ashley Hill', + url='https://github.com/hill-a/stable-baselines', + author_email='ashley.hill@u-psud.fr', + keywords="reinforcement-learning-algorithms reinforcement-learning machine-learning " + "gym openai baselines toolbox python data-science", + license="MIT", + long_description=long_description, + long_description_content_type='text/markdown', + version="2.5.1a0", + ) + +# python setup.py sdist +# python setup.py bdist_wheel +# twine upload --repository-url https://test.pypi.org/legacy/ dist/* +# twine upload dist/* From 75f17b137a2b0b7e12ffc0191560a1195991e1a3 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein Date: Wed, 1 Jun 2022 17:20:36 +0200 Subject: [PATCH 05/38] package versions corrections --- install.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/install.sh b/install.sh index ee695242..cc7ddfc2 100755 --- a/install.sh +++ b/install.sh @@ -1,7 +1,7 @@ #!/bin/sh cp setup_corrections/setup_baselines.py baselines/setup.py -cp setup_corrections/setup_stable_baselines.py stable_baselines/setup.py +cp setup_corrections/setup_stable_baselines.py stable-baselines/setup.py cp setup_corrections/setup_main.py setup.py cp setup_corrections/setup_overcooked.py overcooked_ai/setup.py @@ -21,4 +21,4 @@ cd tfjs-converter yarn cd .. -python setup.py develop \ No newline at end of file +python setup.py develop From 43671214bcce7edeef5fd5ae29fd758b3a5799cd Mon Sep 17 00:00:00 2001 From: micah Date: Fri, 10 Jun 2022 16:08:12 -0700 Subject: [PATCH 06/38] Updated overcooked pointer --- overcooked_ai | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/overcooked_ai b/overcooked_ai index 7e774a1a..57696371 160000 --- a/overcooked_ai +++ b/overcooked_ai @@ -1 +1 @@ -Subproject commit 7e774a1aa29c28b7b69dc0a8903822ac2c6b4f23 +Subproject commit 57696371ecccc6c25f1b8dc86b1cd1d71f3bda2e From a413fb5daffdc7d4799a0fa29abb1c9c643e489d Mon Sep 17 00:00:00 2001 From: micah Date: Fri, 10 Jun 2022 17:29:25 -0700 Subject: [PATCH 07/38] Fixed testing issue --- overcooked_ai | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/overcooked_ai b/overcooked_ai index 57696371..a778db7d 160000 --- a/overcooked_ai +++ b/overcooked_ai @@ -1 +1 @@ -Subproject commit 57696371ecccc6c25f1b8dc86b1cd1d71f3bda2e +Subproject commit a778db7dfcfe2225e84209f8fa382450baf45ed9 From 343904f032f3d28374f16afa9499084dcc1c7348 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Thu, 4 Aug 2022 21:23:32 +0200 Subject: [PATCH 08/38] changes to the install script and readme --- human_aware_rl/ppo/ppo_rllib_client.py | 4 ++++ .../ppo/ppo_rllib_from_params_client.py | 16 +++++++++++++--- human_aware_rl/rllib/rllib.py | 7 ++++--- install.sh | 12 ++++++++++++ 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/human_aware_rl/ppo/ppo_rllib_client.py b/human_aware_rl/ppo/ppo_rllib_client.py index 2972391c..593e9540 100644 --- a/human_aware_rl/ppo/ppo_rllib_client.py +++ b/human_aware_rl/ppo/ppo_rllib_client.py @@ -57,6 +57,9 @@ def _env_creator(env_config): @ex.config def my_config(): + ### Resume chekpoint_path ### + resume_checkpoint_path = None + ### Model params ### # Whether dense reward should come from potential function or not @@ -317,6 +320,7 @@ def my_config(): "seeds" : seeds, "results_dir" : results_dir, "ray_params" : ray_params, + "resume_checkpoint_path": resume_checkpoint_path, "verbose" : verbose } diff --git a/human_aware_rl/ppo/ppo_rllib_from_params_client.py b/human_aware_rl/ppo/ppo_rllib_from_params_client.py index 0b8552a2..4da4c673 100644 --- a/human_aware_rl/ppo/ppo_rllib_from_params_client.py +++ b/human_aware_rl/ppo/ppo_rllib_from_params_client.py @@ -32,7 +32,7 @@ from ray.rllib.models import ModelCatalog from ray.rllib.agents.ppo.ppo import PPOTrainer from human_aware_rl.ppo.ppo_rllib import RllibPPOModel, RllibLSTMPPOModel -from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params +from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params, load_trainer from human_aware_rl.imitation.behavior_cloning_tf2 import BehaviorCloningPolicy, BC_SAVE_DIR @@ -83,6 +83,9 @@ def naive_params_schedule_fn(outside_information): @ex_fp.config def my_config(): + ### Resume chekpoint_path ### + resume_checkpoint_path = None + ### Model params ### # whether to use recurrence in ppo model @@ -368,6 +371,7 @@ def my_config(): "temp_dir" : temp_dir, "results_dir" : results_dir, "ray_params" : ray_params, + "resume_checkpoint_path": resume_checkpoint_path, "verbose" : verbose } @@ -377,8 +381,14 @@ def _env_creater(env_config): def run(params): - # Retrieve the tune.Trainable object that is used for the experiment - trainer = gen_trainer_from_params(params) + + # Check if any resume checkpoint given + saved_path = params["resume_checkpoint_path"] + if saved_path: + trainer = load_trainer(save_path=saved_path, true_num_workers=True) + else: + # Retrieve the tune.Trainable object that is used for the experiment + trainer = gen_trainer_from_params(params) # Object to store training results in result = {} diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py index 8e1bfdea..8a0d2a23 100644 --- a/human_aware_rl/rllib/rllib.py +++ b/human_aware_rl/rllib/rllib.py @@ -622,7 +622,7 @@ def save_trainer(trainer, params, path=None): dill.dump(config, f) return save_path -def load_trainer(save_path): +def load_trainer(save_path, true_num_workers=False): """ Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer` Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory @@ -633,8 +633,9 @@ def load_trainer(save_path): # We use dill (instead of pickle) here because we must deserialize functions config = dill.load(f) - # Override this param to lower overhead in trainer creation - config['training_params']['num_workers'] = 0 + if not true_num_workers: + # Override this param to lower overhead in trainer creation + config['training_params']['num_workers'] = 0 # Get un-trained trainer object with proper config trainer = gen_trainer_from_params(config) diff --git a/install.sh b/install.sh index d5c276b4..2cd021bc 100755 --- a/install.sh +++ b/install.sh @@ -1,4 +1,16 @@ #!/bin/sh + +# Install git-lfs for OSX +if [[ "$OSTYPE" =~ ^darwin ]]; then + if command -v brew + then + brew install git-lfs + git lfs install + else + echo "Please install brew and run the install script again" + fi +fi + cd overcooked_ai pip install -e . cd .. From b2336ff0ca3b25e725efd79643fc09b00b400e4d Mon Sep 17 00:00:00 2001 From: Micah Carroll Date: Thu, 4 Aug 2022 22:56:23 +0100 Subject: [PATCH 09/38] Added PR template --- .github/pull_request_template.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..5c34820e --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,19 @@ +# Description + +Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change. + +Fixes # (issue) + +## Type of change + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] This change requires a documentation update + +# Checklist: + +- [ ] I have commented my code, particularly in hard-to-understand areas +- [ ] I have made corresponding changes to the documentation +- [ ] My changes generate no new warnings +- [ ] I have added tests that prove my fix is effective or that my feature works From a70392909bc8e59ec4ce58278a821c471990a5c0 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Mon, 8 Aug 2022 19:09:55 +0200 Subject: [PATCH 10/38] fix to install file git lfs/brew --- install.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/install.sh b/install.sh index 2cd021bc..d8fed619 100755 --- a/install.sh +++ b/install.sh @@ -2,12 +2,15 @@ # Install git-lfs for OSX if [[ "$OSTYPE" =~ ^darwin ]]; then - if command -v brew - then - brew install git-lfs - git lfs install + if hash git lfs 2>/dev/null; then + git lfs install else - echo "Please install brew and run the install script again" + if command -v brew; then + brew install git-lfs + git lfs install + else + echo "Please install brew and run the install script again" + fi fi fi From 67f5dd6863e449f2a57703d0d9470c2a5404b7d1 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Wed, 10 Aug 2022 19:59:47 +0200 Subject: [PATCH 11/38] add additional documentation to the load_trainer method --- human_aware_rl/rllib/rllib.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py index 8a0d2a23..222e820a 100644 --- a/human_aware_rl/rllib/rllib.py +++ b/human_aware_rl/rllib/rllib.py @@ -626,6 +626,9 @@ def load_trainer(save_path, true_num_workers=False): """ Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer` Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory + Additionally we decide if we want to use the same number of remote workers (see ray library Training APIs) + as we store in the previous configuration, by default = False, we use only the local worker + (see ray library API) """ # Read in params used to create trainer config_path = os.path.join(os.path.dirname(save_path), "config.pkl") From a4a5cd19f013e366df80cb0bdcae7658663d8a42 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Wed, 24 Aug 2022 01:02:46 +0200 Subject: [PATCH 12/38] adding trained model and test on resume functionality --- human_aware_rl/ppo/ppo_rllib_test.py | 61 ++- .../cramped_room/checkpoint-500 | Bin 0 -> 185376 bytes .../cramped_room/checkpoint-500.tune_metadata | Bin 0 -> 214 bytes .../trained_example/cramped_room/config.pkl | Bin 0 -> 2544 bytes .../trained_example/cramped_room/progress.csv | 3 + .../trained_example/cramped_room/result.json | 500 ++++++++++++++++++ 6 files changed, 549 insertions(+), 15 deletions(-) create mode 100644 human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500 create mode 100644 human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500.tune_metadata create mode 100644 human_aware_rl/ppo/trained_example/cramped_room/config.pkl create mode 100644 human_aware_rl/ppo/trained_example/cramped_room/progress.csv create mode 100644 human_aware_rl/ppo/trained_example/cramped_room/result.json diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index b86fc597..bf46d7dd 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -10,6 +10,7 @@ from overcooked_ai_py.agents.benchmarking import AgentEvaluator import tensorflow as tf import numpy as np +import json # Note: using the same seed across architectures can still result in differing values def set_global_seed(seed): @@ -35,7 +36,7 @@ def __init__(self, test_name, compute_pickle, strict, min_performance): self.compute_pickle = compute_pickle self.strict = strict self.min_performance = min_performance - + def setUp(self): set_global_seed(0) @@ -57,13 +58,13 @@ def setUp(self): def tearDown(self): # Write results of this test to disk for future reproducibility tests - # Note: This causes unit tests to have a side effect (generally frowned upon) and only works because + # Note: This causes unit tests to have a side effect (generally frowned upon) and only works because # unittest is single threaded. If tests were run concurrently this could result in a race condition! if self.compute_pickle: with open(PPO_EXPECTED_DATA_PATH, 'wb') as f: pickle.dump(self.expected, f) - - # Cleanup + + # Cleanup shutil.rmtree(self.temp_results_dir) shutil.rmtree(self.temp_model_dir) ray.shutdown() @@ -144,7 +145,7 @@ def test_ppo_sp_no_phi(self): if self.compute_pickle: self.expected['test_ppo_sp_no_phi'] = results - + # Reproducibility test if self.strict: self.assertDictEqual(results, self.expected['test_ppo_sp_no_phi']) @@ -249,35 +250,63 @@ def test_ppo_fp_sp_yes_phi(self): def test_ppo_bc(self): # Train bc model model_dir = self.temp_model_dir - params_to_override = { + params_to_override = { "layouts" : ['inverse_marshmallow_experiment'], "data_path" : None, "epochs" : 10 } bc_params = get_bc_params(**params_to_override) train_bc_model(model_dir, bc_params) - + # Train rllib model - config_updates = { - "results_dir" : self.temp_results_dir, - "bc_schedule" : [(0.0, 0.0), (8e3, 1.0)], - "num_training_iters" : 20, - "bc_model_dir" : model_dir, + config_updates = { + "results_dir" : self.temp_results_dir, + "bc_schedule" : [(0.0, 0.0), (8e3, 1.0)], + "num_training_iters" : 20, + "bc_model_dir" : model_dir, "evaluation_interval" : 5, "verbose" : False } results = ex.run(config_updates=config_updates, options={'--loglevel': 'ERROR'}).result - + # Sanity check self.assertGreaterEqual(results['average_total_reward'], self.min_performance) - + if self.compute_pickle: self.expected['test_ppo_bc'] = results - + # Reproducibility test if self.strict: self.assertDictEqual(results, self.expected['test_ppo_bc']) + def test_resume_functionality(self): + load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500') + # Load and train an agent for another iteration + results = ex_fp.run( + config_updates={ + "results_dir": self.temp_results_dir, + "num_workers": 1, + "num_training_iters": 1, + "resume_checkpoint_path": load_path, + "verbose": False + }, + options={'--loglevel': 'ERROR'} + ).result + + #Test that the rewards from 1 additional iteration are not too different from the original model + #performance + + threshold = 0.1 + + with open('trained_example/cramped_room/result.json') as f: + j = json.loads(f.readlines()[-1]) + #Test total reward + self.assertAlmostEqual(j['episode_reward_mean'], results['average_total_reward'], + delta=threshold * j['episode_reward_mean']) + #Test sparse reward + self.assertAlmostEqual(j['custom_metrics']['sparse_reward_mean'], results['average_sparse_reward'], + delta=threshold * j['custom_metrics']['sparse_reward_mean']) + def _clear_pickle(): # Write an empty dictionary to our static "expected" results location with open(PPO_EXPECTED_DATA_PATH, 'wb') as f: @@ -302,6 +331,8 @@ def _clear_pickle(): suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args)) suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args)) suite.addTest(TestPPORllib('test_ppo_bc', **args)) + suite.addTest(TestPPORllib('test_resume_functionality', **args)) + success = unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful() sys.exit(not success) diff --git a/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500 b/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500 new file mode 100644 index 0000000000000000000000000000000000000000..03752ea8362dd953385d305f6e2a046b697fac91 GIT binary patch literal 185376 zcmXVXd0bB47j=V1Nt%mDDN&Lks^{z@8qE;=6)G44frJL`1d)1aICL93(pL{$nwb zsQ-;i4UKzk@(bM<91)d}tsrFd#_WBY1tdaj>7?rVYknp_}|djQD__ zLXo{ea#o@nf=0v#$uAEYX|Pb#V4=j&qPb8bG)Uq9$9sl`2X2%N8s#kZe@|^@kYZv& ze2~)eAZ3Gv;?AN&V+c4;F6Yw>#rEpKU55Q4{{*nUO$fc zpvtd)k_J~(Y5ui!0(`g}0p=UV;c6RO9w{P@U%SskZFvPucb-Tq9TG^GSvp%Ly%|RB zC=rCeHigLTPPFstXxL^IgbIG;WPQvQIQQO)PARfwo36fqnR*8i#^y5x`&jyXSqfLO z&gS=)tl$>W1-S9#B(9`%m24YpKp%FW1()RpeC{+E-kq;Wm)WnRA1V!KqsYM0qd+j+} z*IX00e?Z9pTOa z(O{aIhzb(AJl69dmPHm(J?r^E-J69$+!r-U7qMY_yI{dUCdfR_29XEeMB?c}n&@!? z2e+AWI~Q~Qh<=2JpMK%rC$p(&c{oIm9KoBGx#EtsU-9R^9(Z{FD6~1tKzZjs+<#n` z-~E<`W9OUTzda4`$MYQUv3|UyIFg;({SRy+6G1)ZFj(D6#p7{TVf2<#czwr}7v~Fw zs#db}Md@Z7qWP-{KFfCE=2jQla#EGrK31epE{~@&bJoJW&8K0G*I&pkT}KU9ou-=` zQqg{X6Vd5x0d2+I)F?)jJGvy2xA`{w^nx!4o3Ai~$z?3OR~^57?juuAoF(Duv&f?K z>(sn#I;}Qzfy#^$Xc|=yEnX*35Y>)HuPme^$7t{;hKIl;W(l8QPbscV#~O)UFc{N} z=N{Fv2)iGcFw9jLpIb*p3!dTC9WnU2%9;Lq6eVzIQAH};gJ~BNP|oow6VqKlo3BUV z%Sr-QcRBGNuaNXfCef$qS~N#ljqaMYlVn5&V04QUtnNxdTlW~~-kXowo>|aqca zizpU{&vFR@$$Mo;CtSreA7v2})89&_6N(6BcHk9}=8gh>M5!YHvEH4`c z{kGi?nkn5<=wS(_Dcm|Zoq7E#3>+X2=y z-V3|K=7GYPyTXIl7vomlsc?Wl$NK{FTTn?(FF8(B{YQgQB9aMa&&kiz z^Wn*01Ce%}hwi1?WcTG55Ge_&mayN8M#IvHUn&4oo&^rGOUZ=GtDttnQ9)mvH*3w< zj)A#J!WE0ZkUkAA*gv_4xyj97%Gutqf3zK@XFX(#65>hunmEC*MP)?cy$z|Ayd*Sn z+X1E0fw2U4*U+lVT#@FXn!beMYp~UJq_*V29e9Pan;i?B=}{^i}10o4-@Z5^O!p)h(KJ0@3Yn-5n~Ud)mBrgKBt;( z2#bNL<;y|(=?)Z$`da0-F962ekAY4tWAv;nCV{`A!9sf)nxBxtpT{Rd?P0XYlsAWk z@!?o?^1aZZ$`tZms*My7oPB41k(DqVA{qya;8{^pNsHd@ieH2QkwKKDQlB}d&6&g-#Lbns5IB&u$mOn}vTmD5r zn`;UNS9rl)o9S?&J_w_N1}#>;--L>N!*Ihn3N>P{*`_DyJ)q{>4>&-f>}b0E=He-z>fwK;be3;zS{qdT)bxvIASes zk@!!rs$e1eE0}`!cU{6u6I?+lS4esrbTKJxE`NG#HGTH^9Q$?uFk4`J2G)Fi4|>xC zT)6m>uzmSPj67#YV=||5Zm!4Ao+`xsGkhUA_B5R9Jxtwt9Ps0VDo7Sdpcw&qJW~Gw zuCPAEd(?LGf0>2oSguY*FKf^St9Tf3SBtLG7zb%(_gL_K%D>fV(x4T#T>GLw%^lH? zYrk(J-EMhw=3p!zbx9i!ZjT555_^8Sw+k=zkH#xS7rD}`Sni(SOFeGY!_HP+^3dxC z^qH&hJzASEFl`_A9>OewBVo)$sKw*j47tMGw{W#92y{0Vpn~ccnC)rGue=}0=NH?; ztmw1C;;}b}a`(`2+*ZZeov)!V{vA&L=FG-xNTTw<5z_FvQV=*wf|S?C~Cfg zM5U()e;3REY3=c#aKILnzeTdRDogxZ76sZb@`aXTE|KzC>!DwNfFzuqjG3x>%%N40 z&AuIld4kQt&O*l4s*Z%8FWt%4>!FxuIS6zA3Q72*RPc0G0Az_$k8WZdiyF& zDlCENUuOxgNan&1&C}2jUqois&g9w^8Q?rV9V;IhQ5}yKGV!Jp95FbJ#&;G#+RK^n zdwevG+EmQ$Tr2>!a1kn98ABqo_T$>aZg6A%NoKV5H1p|MPXBYQBF986@Qu=A)_Q+G z1TAl6GVhB}HSQ<=NSeo-cFPg3JZIkWbU8?DdPYP|)#>^->+rFfCZ8iXNt7-e27A~9 z;R~O`wa-qtdGH#uUmXS}BQN4a$JY?%G@Q;ab$|!Iy+Gw>nMKe2LoiKpCVX&Dfc)-- z_-5h)^j{>(>pD{5>_|71dLPK9w&~F?pT5J1c2h9TJ_R!%hRc0$q!;FOV!(lgyuri{ z>caPeZK*$%32~&kc@LoWX@AYt9S?BjM@`tgy#`#KrE&Qc!}#yduW^K`KRITxlP)^z z&5K*q>BrJCXsFJHS5~v}Ytu!%(iX)WTot+Xl~b^q6wxVL?5X4P(Ohd+1l63>#M*r% z`R18>i1OADYPekvNB(i)lf6nt-#&7R*bKVO^cdXxY?71b!>A zE+`Ls3dh3L@4xUz$rAE%@_BwLC!J^9ynyMie#0LF8S2`z7r(bp!}=?+U_b2`o|$kI zYW7UR@+*&pk|RubphF@A91@{wfx{ti97mHALJ;5o2XuA?qP@*Ws8)&xTQ^;J`mhUZ zbj10Ew9BCK_?+PH&P4Fst7ksX*&GcbI{i5hOaaox(pA3rqOIJd3}PwBzQ4s zzF8#5N(}~qttUDz&LWH7+=n;a#@PIOGnSiv#Yta9NV|(MA34{M@jG#_B~OeeJuSD` z9r7I`F75}hqGHxD;yw&3RiV~>Uo6H>7vavlOd z%;GX=mR^9x(rVmoMH4LXm4>f=w!Br*nNNP!#g;0M<8@v}v^{wf_Dg)m{N$^29T`D4 zPW?fi?tTE<<^F@RMrEp)-vJBitH3BM3bswp1LKpCe6eX55yp4Ij~mUPxK5r6T{196 zN0gpAw;b)F#mTXv^C+7ofgww)K%w?C7)zc2wORr(&yIoX{x(S1t8Y;rsLJPeCNu4l z3bJopCwn@OK|Ys!#Bq&kFfrQ$+CF~72Z<|T>~sm}?--9=YjjDl%_H(8>MW%1S;cyG zuEV1kgxf#PUXbilK6tah5=T_aV%9kie6+KPnT2<=&W)qt{S;9w zpSunMi|(*5e`Z2dl3|UB(G0?Vt-&QTmty|`JB*rEE(p|`38wi{n7ZX4vA8oA_L%KJ zucgvh-qtJ3blF0lEtp6K`quz4I?ZO58-h>kQSwHh4`Np)M9JPW9OYp3^SfsP6u&MP2+0E`lhr2jFcRY^{-8HCIC`W2`+=QG;4!^fin{!V6@drjHW+)#A>u^IYiEo1$&Z;+H*rff`S1m+Fw z5Gb7O6;zE$BP2qd{o=rd$BO#-i%D>l`2B_B31q+c^xkMAr8NDP7!NAS7zP#2d|i_Vb`!E zR7+CF^n`erHL0Fi)|CoGB7B+gDPtH`Wej~Icfl}MA5i}Bp7lR=Muq(=FlqH3I3AjS z#A7?Krd*;vn1DV>uSinYHeBVh7wW6Ov#1EPD0$dO2EtwNitAkVe*Sx=K6{VwdH8j~ z`PIqnNzYdnod1MD@>Ei0RbJDGqsXJ?+j#J51_r#e!FdPD7^(j&@asN^E<-)3u7MMn zOq_wuMb0FDs2}ksuh>Y@ckD(ugWFU0psdGa-28Jq7(05Q)QL~1fBhzI88G7eRwu#U zU6Nqpkj9^OL~zZM`{|7%KS1~B5qf&?1yP&R4)R^Yh`MtI%(rSM~@HrRrr zKFJHdFU=;R8lL=Z+!rj1b)nDfGlk(^D+KA^3c=^_B=%{VF>Xwgf+rsjL0QXbKB4gx zZ(Z^R3xwlnX_qNi&AcMq+M-H77|o|gP11#=Z8`n7M2r3!o6PTuU*RV^JopHj&Cf(W zBdxOv@LPX1Zl8G0;#|) zGg&N|C!+;9cUIupv396EVXM${K`fMPyGrb}0j4ikhWg}a>@zPTb#dO<^4}(uwQgsj zz3bSec_NVayi)+@qtJbO3~nF&gguSigm;^kG2;tXK<@s-^qQsUwD}gKzC>`@s6++$ zPmu9wI4oZC1ugwIlV>MGVe(2TaZR)1uYW1}JY z?nqcQQ5#baoqPR@fvLTX`2N-zK)+5wN`NBItoHzM_pvBAo=oI~ z>F|2?RgnDXzz-ih%w*(lLW6$*`1{1rT!RsGcTEHse_lZK67;ymks|yO`h#2xa;9$z z_foYHL%2|FER9KQ!@wa9X4AfT;F;G)Dq{bGlcq-8;N9w)N;Mf?C2PvRoXNzY+{$i! z>cq7j)mT1xE}tRO$JY0xgUye{c(W)B|(B3SLOg^M+Z(J9OWMDITaS?{fM_-HRGvs;Wh2L$uUc40#Ei~uLW>EiHkqqydL*+zg z@Z?D+s35b4>DwM+O_u)LDzg-eWoqa)@hp60qKjd<4{*85ei{+>1Nn#BxGc38hsE@f zwj7<3$?30(C^ca?^*JEV zXPv^rs3qA_q#6eM7^KC6;J{E(B z?M8f-<|LH1IRILN#lq`9){&Lh`eEsg5me@AD33KQS0D&uPJG z$!@Toc%I}m|A6p$+mWl-P|>WL#HIAF!1VfR9Obwh|Fz%5$N9zFtndrIF_1!sYDM}i z;|Xl2uO#`lN3g1SHP4UUi>a#VvQrRZ#6cA7lGSBpnx)j-^jUN{qj{ zD|1Qa?<6J?;lZ^g|0Ly^q9_eotmx3LnGrE>6b=uRGc&V)N^`eLt9 zGM}9l!>tzl2bF_S=;QVpB<||KrPX8EgW02@ZB80i-3ev;o`>P+;5LEl+B66jG+^4; z3<&w+N){&@kRIQ7_T8oef1iGipNIm9G}s1@jwRxQwXcO)D@p`Eln!Flye!h!av0a{ z=_l3P0X`W$Bq~KY>~P{@eA_w?4G+GD*~QDr#VxsHQ}=Ax8YW_4S|knAa<0O8f^MUx3o*Jr01lCQ;COPwz@${b(QnVukeQh(br*Nx}g0elq@M@lh@0`zF2eosQHIUD#~K#A8p+F zDwll?Dq8rEcUCvBjv&YQelyZYv;~~3gJw! zye=iQo)w5s;vbL{S3exMS|E&&jfP!%Ga)(TAUOeX>_vY9Ry%Enuw_SuT|OCv&rQIc zWywVEj2C2{pM&vpdzs6(3g#$$j^3uV7G=k*Sz+-62vnI$UJOXUGOtV~a;*sRN4P-9 zDSM2nlVsyR$D(xQTBseqK-gDzS)kMA1ksv0Xy#tT#%`N|Zgv6SHvan`c}mYMW#8DVbi+En*arDu&fX|iCt$CBvvtl z#Wz3^&a)SbJRzV)jAtu;0ol_3V9K5p7#^R+R30mG{-};DXewvH#Fs0nEP@5ce&I7^ zDV~ZR*mGRKKV1@~2Ucj&Gaqxo-Hk)X?MtAhR0`VYDCl35LRO?HfMv&t8p8#~_7nn{eAfvFF0+}AvTzGEfcet8Hz zr(R=)t=oyozbWucZXw+NQNk>L#0qb$3qT#^Nsw1pO&*O}4yi|ekqw_tGXJVTwCI<^ z&#z*zKfHq6{izLpaYNie^DZcfvLHvVspF#QXIZ-E5WoJ)15Y2f!iq`{eCBA+zQxVMdPdQfNJya@?6MHWx$XGIfy4kAhJ(Bj90@DF3Xk0MAz~BCg3#(cNASm;QGE zm$HvUKWGtt4&8*UiQ{n9tZ0Z0TnkgqPQhin3S4_-T`NmBZ$A^biThyk-B-ugXd8dYUz9sJZl~^wqBpy|7cA$*cl=d zJ_bfU(Gw(l|7M$>>oWK4lPq?)ETt#qhH_qRK3%=r0}I^V!05?hur67TA2Cn{nTa3J zZPsRHAbC;v$5D~Lwu2ak9T&r?L zsQ*NZ)>J0@PNpgz4h@A@)mm)9bXC42@GrjX6UF`>Jzn~J4BxI{ z4Nr}#aOSo^SaP@;PVHL_k9Y6KkJ?K7!NdsoGNzRnn`IKOb=vfaz6qDL48;J;nOLTE zl^hNogf&YI`L1#h#&ul)oq!ocbfP$G{$)b1S{K6l;fvAeo-(amxP*EMlEM39AkKV$ z0nXWeCx_CWv)!6%WNnQO&H0%pa9Hn2XRp-abE7RFd%h+A`~!(`rvxk}A~>c`mq#0n z9=dOa0NREq-#(seuK9tvIi~!@-yFQ`Qir0pD{!FP0b|T7g{>Q(lKaJT@W9Du&@5Jk zifS#;YV`$sgttI8Z8QDqdxeePcZVGs5aU^|%0XRRlbTJkgn)l)a5?2J9D7)Zd;Zwr z6Q$!Qx`WW_^f}Bdq=z`$UuRaQjxwj^;?z#h2qosYL7S{6dME6qVJ_ZyCPad2z9!sA z)(#`5Nb&H~+4$pi9hv#A0Lzvq5dG1FXEj}g(`+(GJr$uh{QUWdfjqvbe=;BO#h9<` z*~|TwePZJZyWq^D9b{ry23x(dikR&mN$U))`RZF&VcJ>+d@38m-2~F~XRW7LXexR~k8-bL=sWKeIem8lTqqg|ZKrPu?QgdW z<2Fyh{Y4EpDJl^^uUW%W;^*<#9rD!LISJm+lBb6&VgQ9TAo;2umKSD_(DwIWT=9wg zGZth1$K~1j!cb^gtV!#Yd!aE;i%)d*#1fAu7;;2~JLJgmX?qQxuD9Yx z1?%zMtPew)<37xG72%jrUcVzJx=& zh#~!N`)9$;&~Y48lCkSoAr|! zpu|U&xj(aqd8y0btmI;B-gt&-ha6%4F$sb!X)o3zrBx&4w?i;`wHKc6hZp_T;WjfR4@SJpi^~;mmc%a||v8n%0Fm~ZhyxP1N|K+5k>D(69H+mS0 z-Dw6AfjT&1QxYx~+yVW$CNMELnuVK{!tObHnbOxZycgpPwx{&LPS03Sux~PCtd)Sm zGgGl3LmgYg%kfQcv|!Z6BLr@`_ov6 zn@!+-LL48Bcg4E$J|?pMUbWd1eNw+o0WP(~z}<-}L1a&c1xJb^if)30LLcZmmqBhF zhyZ1kxe%x|1HUwmhKldUnf*{bNSC`wd^Dvyw#B@}YZ zm%<(gC!yt3du%>30#2uGA}$xdk)xeOpg8IEsSnsQV+DSD` zpNd1&pYt^-VUt;ENDRzcID}bK7o)-L`^4UNH4JO)BNn%>vigNekjm$ghJ7!|sSPRE zSJXt(RcztGmt<6*jifGbH70}{CG}ByXzy-A>eP`9HfzHz^DQLZAPV(Ij0gFATl~6n zCraOqXUbiiOqicT%2)gmXjRMMd-oxoPU|+Sy>r20Y(qVpI(!yxA6WrTCHj0@O9Jfu z{s01Jb;16<5e%LUG>ytoUAAFjOcpFHXY*x`pM-8B- zUxVJ>_aC0MX(um~bYOW!FMIRrJD%R0Oj;JIqR$HgeXi%(dd}pB=F*7DF16n{F?kA>ZGYf zmzN4^Rq0^#O;z|bOQ z!S;h(u>LWK{ksh%Gk<|fW&$+Umyr%fRo>RP1XjMdjBSFc+@iCMEdOs99u(K7=fc0B z!phl5!zw}K;9mHnyo|e#>xcbUT7l)<<%1DQ@HFWVPQN^gzOui}@7>+cBi>x3k zwQ38W9Vkh?QaV6GZ6!bYbvC!rJdVFVZeZzCZP=L9RZKc43~KGx2v--sVMUrlo)U>_ z^VV&O5F7A;t?e>|LE#%_9~nxd#u=d4oMblLGmh0ZXbX3o%VQ;LlcBOB3H#d?!n3`Z zgqu!q7WHX!u#&?C0>hslXz5$v2PK}UQa!#BQZz)yKH==7f~RcjrE z>+<5D>a-61%!7k$;`tw9h_ToND#j#$Uikm9B14h zm9;m>Y&#vky`Y`l2n*#|b~C{#Z6>~qPK8i#!cx~)!UoMBu=}kDtvvY_(vD4}^4>+b za%K?@99N~~3+n{jRtuf-{y|Ik4wyg16gMUAMRW5-kojDXZaC@9Wyv|={Raz4$LCON z66WH!TQ+p@lsw`y?>k28#n3J1go1`MOb|UufGxg@=mxQ8Z0jL^`1mdwtj^!WN`*Zb z+^B$)5|ZGeQZD%aejqH^Y|Q0)mw}qSD_{K6mKV3p6VAT&2OcKoV31fSuAr;=+w`B< zs}T$G_>;`vQ;SpniSy*kt^9^_0ad;$r}ro}CVFVjqdXwEAM_e7qjBgcE{Hn>_aBMU zDF*~#e$j>2*Ofv}`v(|3Y(Ae~v`o->`!d?Bt!006ykU#?BD!%&AiexeN#L)oi^2po zSaIw(q_=P6KJ1JzMSmC%IJO*(#)weobvnGHpom=fQ8L6Su7r_--S9+bGTj#`gq8ch zz?GX-T-9x;Z&R7U10r9+=LuT;)3ZFlkS090%APx`N8ql3uedpM3O#+US2?vUj($DicQ);G{VW-={bCPA%zHR;b|!6eIJC0_M* z)TBTQ|XcrQ`&H#3^Zki@Lje(5B6No4KaXh|FMLX1Rh4EAw9&?YBr5rGlt(X zSE8?e*;1|ZPXyD<#tCJ86{y;*V)91IjBnH*s<$4spcdNA-rHF4!R|N)x)+Fg+&>VF zItb@S{=r>~(qPGX37#7JahC4IE|8jC$%%g+x35=%*H)8gMr#XPa$JWa(}gr>*Gljk zAwmz1G>1Di(KK!dZ%3#m(#-V+PDjRO#wTg*?fs#bN=l})3!tvbw-7B(wRR&I)e;mvb zZ^GE+cC;{z(s$c6QMsDnjN7wVqD(Ec8|Y#2+t+yi(p4g%GZE@VtN5G{2i7;Fm5MdQ zU`cNPk#g9E@`EQ?M8$HJeb)+4JB&eV76suFu4wYTm|UfSSXwj_B>PlgXT^NXYS@W= zj@O0S`tigBZwh7eV%W@*$+)Ib0;66GVeEY}Y_Kf`cFda(0l(+sj-Lwz&30c{a;_AX zH|`g1nJG^WU%o8}Gx|(+4S5W`&yK-=HJ{k!xAAc7*gTL67U0+)F?hw#9PeDaC@?&? zjftd8U{gEfuxQE1n%rqkjDDLckSTr6d|pM8j#OEkqaBU1#x9^Pw}M#iiy=`ht%BM& zj5rKN!lAZd=#(9W@?KNG{idlvae5ql=$V3>%f_M6fnm%xx0W^}v+{xxHaJ*MG8j`EW*lCBu@GILPi0 z66aPcl6u7x-LsK>RUY!gl@*Q^E!NQ#A0>tQ5Taej5xr9VbVQ zN`P!@1yk^iBYAPV;BD2AxAN3=Hg1hLp4S#bli&}+-{YctWGn4R0Rd{yTg^a2aAwk{T8---VkQbl!UyHEud{*BPhOV zj017SY{9q~Ja=a!W@cU`a;>xR!p{VV=vD*uekuH2`<>_?lZM*%-NFqYmqNL^4k|Cb zPO@!Zu=S~<@Kow$825KLL>~Tt9n*SH;b$+ZAAZX=9n|FyAJ#&tW4ds4l?Pw(X@lUL z<3yg5rOu^fcjBvVPq-l(MM74+1>IZ&swojgBRAwjc#u6U9XExJZ8xLciUw5PFIng} zqL$sS*?_Ae^O@V2fS*qC?;_@PNsJ0Bpo)Srn_PxO}U8fTJxc%g} z_AYRH>w?>T?TLxQZV)lON5;!-M!9|^=x!y@9XE|BUr>drOatgQb%Q$fW-{>D8z-DO z$)cWG5s?PF>T=sz5a$pLE5^MfJELRKSkDzCwW?TG%qSet84E|2jYj7eA|TS`Ba{~( zjujiF;c1~fri^gI!H-+nuIkCCU7S0lB+bR#)*-FNZY6GroP+l*mbP;gENF1y zZYrvD_{JBg<*|a_(rsfQ=?v$Qsr*6IkY1X)54yXpgv%m;G#HPfMC}<|S3eK#df(ub zsW%PMb)jtmBdO^997t`c2bq}7f)a;Bvdt<07514yg#jmH1Dznu(gW+0bBTB2OzNK@ z%XPKZ!MFuoNSqQN_ZLTLr=4(D;TqJR>VpSsROtc!4F1-o(QyA1KFqZUd?mV3?S}!D zPEw;bTlc}AA9~cS@eU4{3+NZearARtqp+-bG?o64iJ!EbsqKvzzF+Gq)b(6qQbWFO zw+APAV2v6t6jkL-4cTN-WeRP%H-=lOgbls_9jH0;IJ;$k2Df>v!H#bd73@yXAneHtO8P)NiN8I!1AZdFxD~(nwLHq zsyU;$apx=^wXBRZ{gI|xmVl2nPQinx3S`A64~ScOk!;>7N)=9Xv{c*!wT2ZIYfi@D z;QY&YSMd;xJu8cjhwI?n$hAzz&b(%ObvD@`)qoGxGRTPiDA4=15w6>ehYMSmlf=16 zu=?RT{v_}TlbwD`khv%e4qTIi`Rld8-6a?99H4mKk_q1K_QSlxro4CVZn|ncvdGG@ zpy+Q3qbg?cpwMihUarg|q{VRj%6%YRP=&D?({P5K7N1a(3;lnlVB4EzAXecFQ^QxH zwZ~X0JLVy-UFD4_od?mobURlcr3i=gFA$a0f8mLC3ixa=p;MZg*o|j*(8|+^6vex- z9mC(T87qvqaLssrG$tRnMY_W!X?tot>NBX1>t?r922kg+0*!FFi5t&Ora^tAbszwzwS8WVEQy9!3W*}*3KOr>#sad0@i7@wX` z3CWyQ#ZXn{O;pCzpzq&g zp_}<09{$pq_3xO6Zx&n!`}U!}P5&fhhIfGB;21hHF9HrJtl@9}-BNn` zvo|kze~pL~nzDU=r}8gR&iMS_0vMMq3E5x{&*onT;o@@W3>4>69?J4m2XR>36b4`a z^a$Q9kHW~e=V+47Ic)BpKo`aKf$hy)+Sw~kkKY_mW8;lczfOZca516>SKbr6un*;f zikV1c1l&{Kg;w0~p;j*^aZQbAB<6)QrcaXVlvq)1u@qc$vsrNV>tB>B83l*;b)bUl zY_Jk+!Ywu;Ao<@d80$2ZT~~Le>Ec_tbU`-XwBCX?FW*OEWKwYAw%zRgo1q@6`6b4W zpFth1bLq_fpTg@yd9`-o5;XIR;Wrk@0-U;zw~v1(E4e<;?5_ZiiQ_PCmjxKRWZ~0O zb~NX>>#dOAlt?P= zwdhgB7Z5Zp1hYei!==l!AmzqAFk765ZFX0ghnqWm{8hu|Y_&wm#8e!;&=I}Q>9Yqv z)i8a{WN>!Af^i?-5Q~y1+?DN(a%bvcrdB8$5KCl_!&8}|_Xsv~bTq8_)keHSzOeNs zkJyb*UT8Qzf^D9y3%`ty*E|{FgHJ~7tZ`gC4;+_d;Vru^iwsx{3nXs~!_#t!(n?Qq z$?pz1{UjYul47B0rY)LQW zWW+QYViq`~^5nfZxNLyswY z$xI7_-iiKLcp?Hsc7LnREMGz_7kgppkye)Zb|&d8O&|g*4~&);2RZ$3#LOuceflF{ z?L8qI_V5bPEb;)Ow5w!Ik_E1{T?YjZo#Bb;QRX>m4_>L*gEC`vF{0QM)T?ZWyyH^r zys;G0*KNm!vms=9ohP_I^v6zbX*Bpa27TU7fiIpN%rkc(njDRWmfdyazxzpGZ0`nd z&MpS)?=h?`!&zWi7$wYou@|N5_M&alEa=2d>=ysdO5`2TZQC`{P;!A}4e3`!Poh|Y zvNDaW@Z#R21ewo0G&*<_vFisk%ABMF;tQc$Z3;|_PKO2Fdtpq$V$O|j!p6w!a4hT+ zo^rp5;c03I!!0Cx*w!Nf_4wU*n0 zg_t_zO?WSqeY72;GO|dgsU^(Ji5$|0dzp6ZG~x2*M{LX0HMk+i11ukV2~0ANkkez5 ziG9yy@;k;H>Mm)p!tzA8pXdc`kBtNqOuZrU>^T1Fp9MQHI2t>0-jF4FPW;$xd5ei^ zlJu&hj>X#{EoP$s(Kcr0VVzR4BX13S`H_TQ^b2mfFf9 zgyO7HA_?=h0T_iGXYU_dKm)reu-HEZN)C-7D_)Jp+KymB;D1@d(`RPmERP_}Zb&BD zjVoA#>jmL+OMRSOHjUg_HV-teD6`A@Z!o7z4e}GD$@unG=K1s;#;u)=FQ(^W&9xO6 zf7Xj-n#BDBpULH)fX z_4n5V@kR^K^pt?aO_z}$jO7cr8&KJ-33Q~|Z|w5Rh3;2&>}`yv#YBmv)JSX_6dOG! zU%hIPL~2ltle4+efCgRQoDbEGqv>9;^%(cChM&3=&h_;c(8sYMxWoM<$SN4|55KPQ zkil~FoYBE{+?_=4w)=A3wKhykFq<~tHRYsmbaF>%@7dgN<1sLkyu=67{V`r3hR27XY4s2W71ozf!~M#*S1tt> z_GiMHr*DMY>pwuYX8>sSio@H`Vp69Pfnu$uSUExk-Fmbjp>`##hGg*B(@vKB(}DhX zUXTLD*u7SYwYzv=vmW4+r?y~OrzwbSvSgC4hl7dXG}0i_B3R_Vj||F0LW51J@P&>l zZthiJ<1F%UVWlIacF17&wI~?rH^eh_R>2cCo)#U9WcSMNVG!R>#4fFdX=Yk5ujM{O z``m(vfyej^L&>z3`CMV>+qo|*tnt~jHrO6%$^V7VWqDJN;OoL%ynbjTHfv}=XuC81 z8@&hjX4q3%KY4!PnH%qVB#QeT-Qc9}L^yxho+vwSqUU2CF=MNJ5H0v*QEb+aIZ#OC z8q45!i#Wgje313T%hQA7eNn_v795`pqu=`vLYZ3(j*sGa@9h~(>uF?F*Bynm`fuQF zxj&SJSkwKN^jX@7bnu)mLkIUO(WG((7I0xCH(L6@qF~Q4Y^j+B!@VZ(@|G5Xwbx-_ zvWir4v@&EF?&IC(Uk~ZPI<%;}ntgq_7aT4n33b0X@iz?%sqLJPcD1pIS-)2e5Lr$xBxh1D@$eWYw)ytZoE8W8B{4B!3~MBFmr4lF}!u0yzVbz zPw68FT=fs1ZZkr${4uyNIgPJN8%qy&P^dg#2NV8#3GT~csqR-Uqy_*Owgit zPJR@8Bfy=j;^{rzQjGI4$L2q>_)Zt-DA%FB<6|3I>>k01zAg{?H5$`G<4N-681|s- zGCb*s<>$6ZQm30%sB>i*YtQ%`8d*a@15}c_19pl z{et*89OEA5gG7Fe$p4|}yyK~U-!M)nNw!pEWt36&I?w${T1Z1Gl1inh2&F^Qgb%Q$4J#BG&W z0g)r#L9g-*?)0UZq65Pjqq~$6bw>gFn6v!SKxUhSwLfHDmcZ$j{^0P$|u6 zom7AwR>qu*iW;`}#>2Mzb(r{mB=<1TnyFMKK!|4+Tur}+1`QXf)xFJZ%?cOHv@+)c z&W6G`{ux@zcF~`2+`(MQmFh*0Vh|+Ocl?<^C2Ktym9d_y zKc!Lsv3&wg59H{JOBP)5PhC{XJ4L2c=do{tLP&MLk0vo{+$`Z5c+#i~2TjxYjOY$* z?fgnD0`G$Q*F9+Q$QZ9^DZ}LOkCqBSA5h7{bQ9d z-gplN)aQ{r%~4P=+5pP*v>;lti}nbv5vwW5!d)vjL9&+*DZUhizukhs+q0NXK43^) z&Bo%PUzud#qC9Ym=tZUQ#r21tmC`)tT%7)N9cFe_Q{z3Q80oZ;iVwDti)%)J->>tS zet9hn&$%YhzZi>_f3^!pPfWm*=ak`eZWTQfEsC923h3@tmZ+)rn2_MIvpKNlB$Vil2=I*^zUw@|qIfs$T}T>m#vzWEl2VTLJeg8Vhf^;GtWh z+bhdL88R0hrBegEq2MspR#b5$O-Nr(lc5&J$|F*vI@--T{ zNt)Pv-;Od*g3&MDk9Mq70KJ9tQ8PPUaQsa_b#z}27H;9NH!%v9iibddLKMV4iKJ~_ zEwt>4I-R&76dTpj>MicI(L3IKWUE^++V9~r6i!xP|0Dt;eKlcMbqHqMT@2B~;rJsg ziW_I;CKMJH;d1p4*w;}4u}gAc@k?=*Yh8jOD`nU%X=`pqOFn-3bqJ5i7%^Fo8SG%M zHOsk~gU=m@aCCY(R<^Wa_VFC3{vpi;{3psL8~((W+A#DRTu$bmB3NbihWPLBfQm9- z^zbvLg=tFQ5v2lhB_D)kH+i4eEd?4Y8;b+{YZIB~SO0py5!6l!hgeZhw0UAh8V%P& zPt$bJ+4zNgs{Twx`1|8eD_&O?eL)OEtI3J#DA4$#!uE^^rUm8#@+|Tgs6XCC9)Dg8 z2BH4oQB;B#HKgH}&ks^7XTX_eiF2Q2E(=yEchU>*EAhc4mHP94CvXc_wUPk-WYoz} z;6`~I#px3+;^Fx-c&&9aSa`+LbpE?Hd-DTY^lvqcXt+yF=I#djaxZ~)whfH8(ZxG0 zEkeUv+PHUk0zN*agPPn9I$KJM7A0E>x=tI>L!GCIkFx<>uvEqyRW0PDAeTgLF(yVQ zKa=3xDdg9G>R>r~J)ai}f%qHh7}>Z@;4gZGsuVb*f{PTblQAUqcV18}O?kml$7r0k zP8ToTxP+ame8wU(MUXKoiZ0rniLsv^kRtacx_344EA&G(Ad4nEO?IYgQ7BUUc85_e7*uk*GO}MjjM(B*HgHR z^X2sxW(}z9J(3GwdY5jL^CPRY-{FGVHdx*=hLc&T#B0vcxVl20YcR0^Va+nEy^_un z!&;&8ODZn*oy4};et}?&;f@7}&<*PjF!Rns%=Fb8EKs-Nra1250#*1Oj%qGvxN
6Yl|4Lbu+Fj1Zy4`}qco=usmSeV|1beRP$Qi6$M%)gh;gMyM zEb_<+?s%RP_fjQ^<+UeLO_#&O`)Z&docH~n$QT0-_md&$s2GUM>9?!VIYka0DWdf) zBVkWvGG+*cWZ>&exc|`ugO&^kCOvjWd$F^^z4HuUKzb`p**q2If8jj<3%iIT&raAp zW;BX_cSflt&rp4%2JBlC54}A+S76iz!3ixPT!|~8CqH?^_bDGit9K1f7- z8soYWFzt&SG`Z@+{%Q%D(UT6sraE*e+zW!*X#Q^cqch_EQik->L@Jm9nC;;_#Kd7Izh5x+#`z8F8SWS1VwZ??wr-W6C+Nd3H5U-s( zj~`XdaYLslTfeVGFv%NmiM0taWqb_h#$WR9up8?hKLh5J9)@p=JXm<#EY>!Z3R0c- z@lwhVHa%#|_V+THGvA^2uI8DPq4*yRa*kr*Jsll*>B# z5L{ebxWUEi$%h5`(9G`$PKelX?ryrA|0+!`cgJSjuql>%yIPwI%bms@5Pd@Btv(2) zrE_?HlmwoOP8HVX26M$hQ@F=7mN3`#J+$3O2uA~|Nyf`{AknuLJ}f5%pGxa-A0(!TRSp8frX&Md$_Ns*WN-L-Rz)t^5e) z{ZUZd;e!Krd4^4p678APj*1ue(0W5T7Mqlc;g<2j$JQ}qz)BroxrD$r>$NmAW;On` zu_nVMSELUS_k;mYIYBO#7b%m~7a2(5AVu`z*w?Oln1p1CWjTQ|ZLZdhK z5R$fy9{6^UtbJW5xCM`C*UR>L>@2T;Rx*`bJz|Q-i=-j>x(q%DYM|W`=jqJRb~q!X znCvk2Cg-{=K&rqVrPd5n!xf$Mpmqjc>+_=1zmCEGGDc&CaSnV+^uY&Li%}!r0cVK& zLWa&}JQ~$R9B;0Iy46!?&)9WveO)D-oK%7b|D_7D%T~eeV1M*(E9UR<5vaVO2*pzi zanrXEc=fCpt+>YTbxZ@`&e;rNdBzt>)Hb|ok%;-}H%Q~bWSa7Q5@lH~u+~HkMgHc| z5ba(X`c@vjFU%Bn1ihpp-p=%8raf70@|`BVbmPBY_n=E@7@WPX4ObgXQ9ZAj%+%eF zvlNp-y+szECQpHZ8;LmWPO$B}k#VT9SP!nTEE@MNuU_k_HFo3p zje@D7IHUXsUX~jL6Wi*j^XV~Aqvx1CjbL`S5pCJpPPTbJqAl-KLC)I~<&OU) zKc*z1TuCx{F6n@a*N4+09d|+Fs!IZ&2X;{PEuQvPjE9s-x){-~0ynZ_Vfk|lOg-_B z&|`TZQv47)^aaABvpcC96;;I6ZaPZm-lq+f_YDXcjn^D0a@q z_&h~&g0jd>ZyT3F*$8-?07o2)N5@0J`5lV#O~f^4Pu&wmehk&mxBYqmYyuH-emqE|`3j zf!)U|vH46rot{$1mNgs~j*uEaLEtWkui(g5PhX+-)@N``w;Q^LCU9HsXRuCDV|G#f zB6|`>g&3}}b(ib^y-%dx=yQjf2@+Wgr8jG0*M!;Q_EnNGWFJQp? ztAliBbNk){z8J8@LhTJG=5q}uy#0&Icz@;GRi*4tXe4_pmJU_U4{^bgYhdbC0cPie z*tQi(T=#1+CgR^lX77;X>K9Q~XYd7SXEVI`cA7PXy{9rV$!Ox8MxZAEU4%zTV#7b7 z_Fz1?oEt|Mr*A}$CSNr2-VU1Yzfs5Jt#Id;9As2HA)J9McHNyu9{-7@@2}bxb;*>h0^!iTGt}Q?1U_0{2D_OLuD?DNdtZ9d_sU_Qh zO#g${rqjv0#nHI@;S9ECTRB;?_5xALQezh^_Tsk<;k;G9h_!wFfH(Z7s}`VOKU;|2GDv&c=&@tt9Qj71-nVgr+T0rT-Nz<+*W+ zZ0d1UGGsK8b@Xh8PYY|%B%jYrj`D&n(%Rh0nq)k_pHb>ewaE@ElxPuOEVB@92ofwjXP4Y|G z^Z6>A!R9&;|6UJEZ>F=VR#i@5Gm@Ev9b|U9OYocG8B{o)LpMwb63DA+a~{UN=$*NN z9W6eGav!9))Qs75dbT_q7yV6UEn3M6;@X$LPs6vee7+Qq3F#4JfNb9<@knNA;z4ER4|wa z-=PvecH$ygeGE~NM3=${Jfl@0nDuTDMjxss-(=nGs;!mLO>{i_T`DF}?h?U@{}Rzu z(~o$ic88SaIYO9ElHySrg<*%DPvereK_^>9%#KyrrYKpK-(K;Q25!1?7N;zza+&0tBqv8Mij%Az}2u%_8b^V zSHMt>E2gaAc^k`GVN;o+kf=J3$vsTLbiLWE@!%9t{aGnkbMq}Y^7`cD539J=k}f!F zcnVG!*U~%5o1wO*fO_qf1pBA}NV%U%YA46i+W0E+*H%c|C#s;`h)7@$Khf$-QBb7n zB}n-ki(TovKyr#7K$9O9ZITgAQEnm88BSpLX)mM zOCF*1(M$Amvm`t%xhvEj-i(1^4EL>jg{z&M>xB-6I4)2PR*bwve`cDIbqi8JYAwM} zVuy&M<5^f<5Jiv8Jx#q7Ps7qL2dKf;U2w{h*EbAO$(qa}P(9^M?>^Wjyg&38jI~8@ zZqGaFv~wev4VC;<JYeA9)d~^m`;Y z51L{pNP>dLSjfMdNtSMkr2bqIzPut&$Is^ZEFY(X-uEL^r&142)JKq@=Vz&O>Uq+2 z>n8699IT&mIEjWfucCKdi%8B2XDAcT!;8n&Xn#=|naF$1_P53gOFuZlwXg~b9WycD zkU4&fFvs)eOCdme8IEZ^N_S~pqR-{FqZH2@ZGHQPL~LAxf>EZZJZln4>$}2_ms+SD z@(wHJ%VGcHGdzPK34%+aq5VQWl`U|9k1o=Jz9UUwUKEFaJ166Qog2c$H5zok&qnm| zN`YZ}Wze12X%~G+6@!ujF>CG-G+*;k=%}TI<(JB_FYz~Mw%sJX4`k?f=N;6ddl^nw zUV^HF8!_{$8ybb%;Y(|6p?~pr^4L6y7RDN2mD&v=edQ{NoMT=exm=&@T{=u7)wz0F zvIxBb15sr0q}q|!cnwCxm!I2ZQ8P+LC})?9c0Z-C<@{~>PkKIPY}iV?mN$2s%&Zb6Q zNXcQ4Zxa1}m_sFn9T0hC6q^@uiCjGA$W7}=z>fyeT>qKoEbx~^|Oc>iVs?@wn1;zJ@Zz~4Q4kZ;B zf06i0j2?23gYh9%WUhEJBrQ$Aq6tkz(>GplHP05$UMnRvA`7XJbvVooQ-w1JN21GB z7g{rp=j|B8)2>Npd0nHKmU#t1DAmIv_kSeTVkSmB%tC`j8==m;4CD*!Sg6lWc=NIm ze8v~xaz#EP*)a{2gc7G4PZaRaHqD+TK8p&;9s9M%$@U)&WK%u3pPt|N`LmVtl2tDWa(VI_Tme4^xVfM&CzwkXH`l3 zw@9is)`8CHHODqXM~KN9g|Hxz*A2HoON#-Oem9=xyoyD~*g&4KBu`@Nq^ObKV>06A zb!z`~n24BkT331T4CL4VOnH`waz}Z_RYsq%V4Wt`*e!y$&pYX!g$E%^XE~m*OcmUZ z>>+16HsfM-CESmP&~17JRE6Hbv+gU1#I7lHaaoX}E;^p~_XamFR>mskBNUkz=%rv7fs5ttaQfNX;t6X`A@iMqbfETzFoJvs!kE1lWmzl(5Px*=LGm;jB5 z6sL)#qSLrvK5DVU1!QX{znR=@S?A#`Ua~ij>W%A`{HEA+zovX(k!L=1l0GpCDh@Zot^xaDHTQZ#}{v+He?^%G?@BpkST_b}=EXN<5l zgAy+tFmPVY4cvMJ!gg`);M+UMJ*Dt^<0ZJK&`P3I7Q&h@yfQkoRXDZCl#8^eqjO#3 z!NjGDo3VZbO8!@e4OMHw_TMUQ&g1`3v*r|B3=%!6dKKm~weO_wUy`$TuoLX^}K0_3Iip!q0?D_PGrS`_7TfB@evzmf{EzdgB>TPJaJh%<4WQ2! z!zi{oW-O;zEzNaRbmEZaL@tQWAv?kX&aomBC#OdVr8gbNl_JZbaN;wPnf3@BwSt*s z(l2tF&kLE~+`}rqXA0!f4B@s#Jq~{!qI)hXv&OTxps6sNxfm`aQMXg5(*{qNe@C7> zywXl^!t^zYNn0?P;Kx|?PaluhJQkjK6LsaVfJ5=9!TG)IqiY=@=%H7oN zh2<)r!M|t_9;J6<+M1_0dBKhgtNsY^jH{@qnVErQ+o?5;Z$ zDg7XF1C~@o+S~R{#U{+yu>@*GGUy1^G4S$hhahOQ36$p(I1o-N(Y=lW{| z9(w-p(swWEEYZcSCldvys!!3tOjFqS-$$w+mx3xW;;hWk9)h;!fz94Xva4?n$|&Cy z4vd*BSa$0%*_!rIaDJ)-40xW0j$So*@o5gq-hWEBPf~%KnzzZj(hD>s#gSP5NCU5@ z(I8UjM^^i)V%LKoMDG``-!-fvBYY-9dU+uBUe6~dB#%>z)5^TI7mV}HiJ?v8ZuB5& zjYt-xu+{bku?)+`jN1!IpwBftDsvj4#;9X2G?YPH;kxe>Rm% zp)>Oj3F!GPm^iZl1(p_AY$negN}2`R&936;<1SzyG9LC@U#5Yg%6QG#9|B|);eKVT z!0NL|79w zFGIqGN$n$%?hvQFZEu8$@@izf`vASZG6X%_M?lx~Fi3LjrcZmW6aSO;*dO+RdhL^^ zUG~v%Ftv$Z^$Nl+W8S+lYZeT|$bopz7HY53LW}fNu-d1RT5NCz69a3yZDIz!SSAH2 zBc>y!iD6%(E}hVAgMVFl-*2iCOswU=`=b)gUv!;hEdbQdy$v>3GDt{cHfC*E3=b#% z#B=YbfmOi?*nRChbYIJdjnVIfvj1ebXT#E*WXxCmSCJ39%A2w0HL%k1r${zga}qNf zgkL`J|Ih3;Jd=75CQLNs?%SUQt&pF>r#EkqhL$zxL4C0?Yzag}4Uw+o>%@Z#hT5@q zm^k}}P-od@j2uo81X-HFx~4eXT^ol3>OtUd&`p=sgkVsa1r)j*rH3-&`8>QCguD-e z2;~a$_|bguFx&+q6(fn}9aTY%TR2{I7==sjSc2cc46;Hc4$rBnVeJ|b;yoq~rj%Nv z%UVOY+BXBMU+;&I)xq@0^}V<+E*hU!{i9W**HTR_p1*Fa02gzm;MON+3|gKo@Vqw} zLo1KK&*nVT(yv6@_GyCCiN{beF%$0Gx&n2TXJXfsNWe)?{CZ z3wqwr+^^bL;O&Pt4gs9q?LJ%?P>COOzLUVxpLBOtKS_PIgyV85!TYo{6Z^KGIXo6; z68|m3g?|Z<;8Cn8JrNYYl%Q;vE4CKr;kPjdAnQURSiawjeY+Vf9lS26;h&HEtFdVC z%Y*rNxeN z>1Dq_Ip2yKRbj!5UcZA~NBimPrRLngS_h&b&F5_I9^~qG)S;Z5H!dq%MWmjXg2=-S zF#pd+I;&>@w~SC@i_|!_>)JJPu{jYp+a~bm^9K0cfi(N6U9U4~;{RcN!jie6q zO>tUHDt!Kt50_DgmPe++heUJc@ApBlVn~);NU5dY#x~<6epVGMe@w!)rHJ#0J78u# z7bH*aXCME5z-xOd_-+qpw($Kb&TqqMShk{?=3U@fL{7KS)~Qc$S6_#-I`IHbH*Upo zWzVtmXdMoR^V#1PTkdD~0B8@$u$k+|vVgwNc<}|#47agiN!ML4;z%c`SFPb3hv%W^ z&n!XihXT;+*aZP4<2bj7Pc%YL8#EN0u|<{V18i9VZ#kpLa9iY^94*I0_@LXbTRyAM1$}?K&=gK@X`_ncwYknb+ z-D<}fgjEZ6S3km8&CBp*gAS)~<}N5)5aoOyE3wh*yqM)`cP4rujMFl-;Uw}zxkTq) z{I#c>Xy~dj4^4`j{-vXu*JL1i^y(yeaHfM@)Fkq%)PHjPX1nT0i3$ZAGG z;?hIxMdoSlfZBR)o&P2r7od`>Wfr>IgdO%$ z#BO;Drl9p2HI|O&w&WP&qktGrHuVL1Js-;zuaoM}&yGU}$GM!>ha|YV*9$GX zUXX#)65N^3`YdX%Bq#pFf-K>)!^_gsak5(v3AOXUY2gN-_eP4y9T<(-7m}c(!GsxH zlxO>jo!M}8rjYAVX4PZ7-~<%IzC3p>M)iwusvin`J{_klXbwJEeGe`^Z~_IbeVjp} zJiQQR%N2TfGtXN(5c;c*Ufm1We7b-;FAQJ;_1nBJUWq&9TT6<2y6KHKE^Kl3S$115 z3%}*u1;_&~#9M=T_GxlgqQkLE!G*i09z)i+K19t8>Vi+b9-Kz*db(hOHv4%$mFMT* zeiGIW)d&(QM7e-AYbG^n zAwBZ55&flp<8Fy@+^a|OOeEzVHvALk{Fgd$>rOqOO&`QSt8W=^u1Eusmt7d+Gl7%h z6p6gwO|sZ(1it!n5&nyq4t3uYIJx?949-krx9!~on+EPe&k{#4NT0xzY%UTBiwaEX z9}D{(cfqZ`qqP6kP84`#(XGqfV3lV%1iY@Hhm&}0o1z2^cnwkW_uo+`WUJ zViN3JS1j1uAVLGZKhQVDlBi_;9G?1b$1|=2bj$BaXi$+ybyi%YqJ;!HqQ!kM(>8fX-Re2t4Bldcnv(aH^0pgY+KRh0%f;=#w4_ z->jUm@AXJ{`RcrI`Bp`Wr| zlD)bsG1ev=%r37-E-ePc^ygvcB_{|GGoZEpRkXiU9ZpDx(L}jqbhwcKT@Ovj?~wAPTi+)l9eA`q@8)Nr?!7(#PjCwDOEeN2E=90pU z#ox&0h^6p6oX>bhC}Q0}2p;TcqMg4Rs7G8d@m8CNS!*r9F+GTg2WZ1fvsj{*WDGh9 z{&-e32<)VF@sLX>)@r?@jn?B~shbZ(E?7&l_Gp3TU@oz$bs@W>_>7x$E9Nbn%vN%I z_ICMZ98Kjo(;g+xN@F`#-K)aHnWJe-X$O~(eTrm^t;GJ6ZcLvVN%wn9=GOJ^g2ER! z!M5!kqURg>AkdSUpIOZ%j>&~W8&@nEZB9h4jTWBHu|`MzAkuPU7udA#0Ke9)&?3`E zrj1X4BE#$C_snp7v&$Ht4$K7+Ro=^bTov*MIgoN3rYF|%*`8nv&~E3MB=ZgmvfFtM zsdhHmv1c+`>OLV+&5fwG_z2ymxdz`gXQ8XiU7BcfoIY>rhu2qQ>~iKF!y9v&>D-zE zIPvWTS((Xm-UfEyuM{D*Jold7T_6hKA~Nit8^twGj*#tzvRLxw3%pe>#-7_X@Ol)V zVV)p^-t(P=8?MFj+VEH``fnvnd>{#$o=5rY#s}*7pBr(!x|A3x?fIUbro$Hzqw5>!t#AN~{cJ$p$<9yAaPol^%DTV?p- z+)h6FPXXZx6Y{AzTWI7X$F%>b!_)W6aFId)Ei&^bw`$VJ`9+O_e$$77X@*;1b3r0i znK%i~r_O;|UZ-VRne_YeDAJ&-M_Thfk@Xq{RBDe4kiz4b_^1&FBASU+mIAk%_gR|u zNpK%ZY&nk$(m3D!HuOK1=j!#$IrIKHl>In@J5+It(maZu4ue!mbrEJ=@W<5>c6@p- zgheSju?f54(Xu}TRGBZmeq$K~<-CMRx>nqt7knRA*d}bcZ^{-dF5(V$Jc6+^wve}@ z#JRvzN5K4C0=cjD4))$w;2w7$?yxs#;c8Mf5DR@JXt$}c)=?sW;*-!e{ zEP%xi9@BAmy>Q-qeY}4;9A6KgrX8}+$svgtBL1M6y6kd-Bbm}5pLd1t)er}5r3^G^ z+lGUgsrWYU2z}tw2qe0QbSx~P!Qb12y19+yl@?(C@moUI|0MB5SSp0MZGnweU&$Am z3CE7*)1Jh%`V(41G;UKlDrZ#EHJe`3%TvVIy59}Z<#!lvZQ|L1eD6xew*BOfzatmA zxQrstybf7WNuuXyLj3^)7B9a7(=)i zxIH6F*wXZmbjv&u*!ku$Y~p>!M<+Y6E4TQrKeOX}ey$OJj;_GOmJ}wo#+c1$&%<{M z?&FyEJhQuV3v7=V#WTu^>Eqf5ux*AV+gLr5d!kchb<>i$*x( z`9k0tc%yjm+!8?@5XVQM};JJZQf2+6kLR=bGPBs zFlDw}dlQpcXUt9aUMf60sLSU0#!{05Q&GxHmNP1?pmISeVEh%@UE{9^L%6kdFpMD z{w^E!qA294`~rKIE;y_z3StgLkZ@27&kkyG?=Bsr{6K|cw1*+*%U*JN!8Z2!vj{E- znv9JH zVDz0a+{;``ey{Kv*DU%<`sDO6#%vDPwl|1Vm{&xvgs#9~I0{bRi5<-JJoQ#T)KHk4w_r(?MC<57Bc zXE_+Z(%{_oy}+j46yZ*+taI$vVh8WsfLv91FnkvVntUJF@AUoT(WNHgsg_c*b(AId z&}bbSRvRWuT};@>bylEWP=N*4BhhFLV*iXf7=1#EojQCT@{6X@rzysqP1k(7*xw4s z8XYDkX9rQ=m0`q~*N~%Lger>~1Wqw-G`i3Z4BHjaM|lNm?)^g*3+KYTVpG`P9*yM_ z&Ee0iljzq}%X9T+R_R&9C zKq`9A(T}3zgsoFsg`Ok*smkyRfdk*4xAbD4AV;klTgDdB^9D1irN9#Z7>AK$y;*o9 zPZ8xNmD3UYym;f>N1<)Y@A{b|*V4@~&Umyn1yfaKz<)|5sJQhi9xXkKiLE(!a?(CJ zr!NR*FAJvXGlu9ri)Ol9KM+q0&Oz!wAEygFa0=5GuIki)$=b)s!=C?0Yvdoo*{9>A zKvm#FHqh7PBdjrEG$Z~pZ88#LtA==PtXVq^eqszHYX8b%LQ@#VA{&1&PveZ$;O9EYXt%gsX&X9O=8{e0hO(cI!C0}+q zqw`05Xk9OXblV6FRc|EH(n2!fxdQ4;NT#YW8}LL(I8nd29IGGN!rAN=VZJoa-Mc=I z-fNVCpjiP#vs+9!-qeVcPW9%yyi~F3Xfx^Bs09b#0MOPI)c)9X;nhk7c&s!D?PK+Y zPoLeP8?D?yZTodXbk30qzr9Eono^fnTR}R)2s1Xzk!j*Nw7Nu_?}>RTIK+22YCeBR zQX;m)pQswh6BK}e_idXDy+zV0&6d4NBR>`jATcms&t!6F;+?~fr=HZ6k7 zyuP_aBOSeS=VQ*K`>5S>g6|5+Iea8kX^Y}n4YBqJ95KtT*?x*%+C|er)`A3EdluAXK!JJPB;WMejAiI?0MX{v>7BpX?yu4Ut%K z`WBqK`A$GLj^ozsy$c6ESi!oq8ZeS4a3g0GG|FuN8=l`(yJ7?Rez1V#nXN!>ejQZ| z3`djxv*c`nDJl9D23|QM(5rAN*1L>>$R(Bnr6aQ-G50;4*Srs1_vm2PKSqjqu2o9z zFzuP6290xu>>|=)=&>8l*l(YS~uFdQ(}n5#Z!oJsE5GzYb80*7DpFv5fwyc zRnsis`Q7f)DB-P-3k-aqE#@OF4xEECHT)s}l^Xok=^)Mi`nY*^op2yV6uzBp#esu6 z$m!FQgr$>WnZ|4d?oZ?jR=@WtPFt9SuOBYJ=Y!3dou5whi|-+5WWkq~E*!Sa#Y}Ta zt~>7o%3rul)1H;$p+$SpUYt2#U(S+_3j?xcvPIIo|0c=&M z1Bf-e#J2UOoKkWXTmC$k6YD5o?DGR|lbI_ftaV^pI|JE-ai;hpOpOhN+`+w0JmXj} zpLm^cxBK_<8ovFv4Y{mcOvT_fOU{?!8m^sY555J$yx;@uO|d2$U+xY!KD>Z{@#naa zfAz@aH<7IIjYBP(EQ2Gi+t@2rY0RBEj>Z1fWOwt;n8MNhBq%tF*%^F>@QUxaX8uiT zdutZ`c=!}O*>nu%@cd0Jjab&vH52PYc!rZp3{lds#UkZCn$nm{T==`Vi}OihQ>ITs z`Q1ia`xv+p#^IqjAJX$h5q5oDiE1@~ixrH)jWf0!B6n{V!{5)f_|rE9^3U^ZjKi%UeEuK3{CEQ1k=Tx+R2Ki(oyUjvEwI%g zjyb32!stEapp-q14M!H>e;X|X{rQT#cQcmVA^bk3GZQS%d2-{c)0xCbcP7jCx>RrZ zPV622gFg;=*poULd?h70NZ|8&+AndDIe=oXB76B$oQp3~?QW~roPfAzy;`wh8oI_n2H{r};m@Ix38*?6!as7p``jRNi zE6aurVd1=fTZ5LjpMg%!4CWMQ##L_i=VCroFxTW9++TheE4S;wU&*6zKq{o(U!fBp zujOaKZ8Yvj9XOcLMIZQk;GHdnv~TtS;ppivap%-_y!XS3X-+%F+CE8e zZq?h_*^$|FQ#Q{So79RrOPX-hyQA!#&RAEqO=k}0}pDYJc^5gLj?@jNPI!)#DqnVrj9`tc^5FAtPr`0#R@YsW1$et=e&R;49 zg`Q&4t(5@WVI!QG7>lExdO)&DA2=KuhNGX`;PZAx@LWBcZd@CR_on*_v)_HBnI=0} zYefc`vHK!bj|;{ngJ1Y=r7?mpsteHZ_68I;F2uAc&fKzZU2yAIGOEhe(x^9zFfTKJ zF0V?({iP>RXXX^zI^r8R7G1?^?NsjmV{`C*m4GrYjId-xC(a%%!T$U99IWhaVAkJ$ z=>4O~By~pPtsNpz(qYd6M-`!)+k9^OwiCGN?r5g_S`j@~o`Sb~Wa!%?9M>bR#aPDy zUYF-RnX1Dezb+CdD<5N9cF%=y<_9J#8@)$@6m?x}UQ zyZYr}<>i}bp>`kp9wp&h={2mi0S_sJ`qz5#O)sAfp=*V z`a24IU9BN$=UKeFt_1JpO5p4xhv=zm3kel{N;2svh^o*dH@c=_&8t)_wU>vPq4Ch% zlZRsaM#76SeV7)TOWn%P;J3C)I^%+?P-Th}YCLm+YadS0W??%nS@eY1oQ*;;?@{oo zb|WOk3{mTImFU=Pj*=0Fq0}aoln)*g6h6-+9Ru2mc9rEmw zZFgZ$njYr)j-fj?yAYo*qv6}ZN5sbMCk9U*3psmy39Uff=Q;_ihcDu&b)SXFpKDO_ z_H^j46=MEg7c^i05biJ&+#%xt5${uoCqHw2UOWZ%2OHunJ$2YW<`mCdiNGg=LDc?$ zDAkldPrDw5@b48x7-w8Zmz(^eB4NX}nv>#5%JcnTSv3bkppX0>RYoT48jFjK19?8O zw!m@rXk1vd4yC4!fb7F{R9(g$8=v};1=FK(t=DGsPfwv;`@RZ2&o9D;*b;P7zlfLi z=a2`_mcq*NO!CEZ95Vh69CTqeWQQ-L*-iWCy2v;hw%&kPJQxqp$~l3NBY(E+{!ZVX z%z*DHd>?32DNO(UQ{cWclvHQVhd@OIn4;kf4F@tH%xJBHE1d~7|kQ)#DW zxOXfKRysZt%v>UY_tyr&j|vwkxa7p12CT*5JRPQ;zZrjIF-)%)(l>cCx$|EqI}NPAn)Bz$emthS15LXA@4xd!PtnETH)sYTKJV?Ue%cA7 zO-*3-(+RNl{yQ2sF9-H;@wn2Zl_Yjc!DsT2Y;=&uFb7F;<=b2As~O81w${KFt2^Xr z@h86TQiT}~InejlXOf`MQ@G9444-FPpuyd-uqI#zR}<-5G@3Zyj=>I}1^A?W6V5i-1{t5mkfJaX;maS#Nl>jJeRX*sh6<9%ycBtK zeQ1hhcU7Q^4Upgeqv*WDvHspTE?Y8^B%whiAua1U_d!N7qCqO5($J7fC8g{wqoq=a zq)3v4=iDcWG}KqSRMMbGODbvoKEHom&*gIA`8?;`_xt^NEj1uEUB^Yb)8~L~f0S?+ zT@4x4W+KI?vur`rF!1+q!}(!fMWwr{#HlYEL`Dzg@ypK1_)85%G7Ih5*F&>#*zyo2 z=dc#579`-N1A;l^-!{>n@axr>}KBXM)pe9?ao^U+OJ6TFr#gEb8*ROBkKednlS zsfH%M+o}tO_vPW#u@(H-meaU?Bq6>-Lt$u;EB#dB%!Bl``Q9NRq}tL3vhsw_IH%$K zsNfIHjZ))R?nFSJnKRwjGMaDRBE@(2pM_^3lZ3BVtjq_mdLN3?T4Y!oVNUqCJYs!Rs;u zL=XOsW9ok{iPN>3L36n7s@v^{G*e^~e9Ru|Eh|Lqgc*6`j@4p(1zu-i} zmCxX$#6rkqA0aZO77lF+Cd;R%3 zb{+l+@7J_rK+a?^UY!Othd#q?>-B82=@lzd9}b-+*YNk3vv@8u1}=L^(A78pk?_BR zaHy6vZ@G~GwoBvbnByODZVsW(@4v$Z?@q&tOUIz@j5Zx6?9&faG@{8j1OAm1 zyt(r5G(LtVFiF1&{K>H2@a-jF(vA~6f22QMO>}s*hZO$?aiS7!V>ah*3=XkKhT(79 zNaT(Ybdupcq80TVMX4U#RA7xZZz<$Pfj*?zLIrhirPhp9y=u88WeDAAI+A`!0xEyX z0bZ>BjIDO={IsV5U!o;Ap46mioA*FEQ1%>2jLia@)k;`D)D=&e3Cx?-ZRFwSDApRS zK^yvNz*=B;)g{e>GvPOJ=j%+k;j{yOhg?N7-Ab4!xLQ0%{KSXHCtzo>2@TK@vU$R6 zU}`qu#wN1#`np4qB^?XVPJh5N<0#N4H_6ohywG505xW&wL*CmT#@coLuygo()_sK# zu|yrd*{VaIB_>0(h78EtO0t*RL=dqfCg2;Z~Af<^#rQB48T_1+iAKIBP zt>Xb4BO~x9KLpEktDxmw16aNvz(u!S6Y_m3vszFE%A1m5okt&N*dGzxwYk{gQVT7Q zgx>t8^APrNB+aeMMqB?kux>>Lo=rT9>30JmjSDWC@7vH`>k_^?8%U(G0x?!J1Mq1* zTAjZuII@-aj=O5G!eTG^QznheH%Fta!BlWP;m2Oy*@9u~JuqVaODh{qKL{VO5nIz- zKv#``rsxj2uf7?#H3%7ZA!8R*I19>7CyOKUR4_rQn$&bnf#;iCvFTbOs9vyw4eCnR zs;UcnCN2b$vkO-Z6LQ^O@2~>LCF1(+Cs|bNLG1Kvtfcr5IqtMwboj_Nq;>zX zF+&8d_)cTASWzMxbap1H4_SbIvrbvHxL1pwT6_{u@^2@-rk8QS3=NUv9-+t6JQEV~ zDv9^pbaMZ*m{b*?BR*TA*r*yUabZjh%ldC4kr+K(a0Mq~VU55WA1WnEI53bz>5moe z{aW}ELNH0v4})9Yku^cxtSwz0RE2C%+j1Ak{eF`;)~B&}>0P3ChPnbTDhOjo>SDM| z33G{}=(c19E;rr^v&*enS(h^0%n`am<(jOtYc5VpQ9%3X2h|hK*Q4W&wasr*Ua0hTRshN|5$7(58T-zW+szy6=!CS*kS>R|KlZBW)@L{bh< zg8^-Yb0SJ z!O3WS+)}7a*S}NX=T>K9$KVlEC^2DJcRCtu@+Ok5>NqXX7evB&y}w{7?CW|A+9(Qb0>0Pyyo`a>wyTME<1UOC_Od41X~NVWSGlj^oUxY^Z3^1`pfd$h{W`1HBJThSWf;5>~U}an+}1|TP+D?LnUCEXc$)J89<1HJw{~4 zv54ad#9iqV^sLy91{X7!gsxuoo?m;}u3diop65gGER4dGtNE;Mq8``2br=q8Ed-w_ zE#tT7UcxHhEXz+OR>wu2_l2zZYZuV>ABQOiJ7@Ux2?7#(-n9Aumcj zOQol0@*L@7bbXu)_d3~znrrsKZmp^G(EWN6|2>kM&1O_IunG-6&mgOTbDL-V=!k-Y zKla^1_JkRj|mTbmMj{E-2W+QPx>Pbn+e!u1HPnt>A~sbo_qa0-If{MJ-Y*!8h7W*p-H$zSIilH6{^0AGs)w z{UbQJek zrAHVtcRVW__0dxG4>?zJ|UO7 z`tA%q4My{@n&ossVgY$8cpl5noA9sFS|D>{2-Qq>;YD{sxp{pN+;382jXf(wpQIwu zbmS{2b~6RJ_-pv^{&4QL!I%xJ+|9+2+QNHpIW)h%f}$<6sQ=79vc5cuX?@;8>*y^O z@-ULm+EEW}%NKM1b7$aN>{3`jU%?|K3BEUU0PaXshb1R1sK?NQ@Mdok{^mR2YwQD- zQo%$4K8LtWcs@>l2YmMFqjW`)4jpvV2QS?Zn~nicryT@qINZV{LX+lX9RAiC@`qhPk3NH>hf@lsddfnyRM z&T_H%PCABoor0y3vuNJxMR@VdUT}Nv1nl2gF8wK*Mhm^I#>36Hr28=QAG4f-(CG-Y zE#@0!_Hc)cd0bcZ0C*%`1h3MOnD%}Yuj-n}<*KLRivyB$ci}EROKu~tUffcnXsJyz z7EKqOjjhF+xp7?a<9U=gI*A8tcjU9DbwH(TIGUf-rGphMxcxFQUJg{GPK%V`>5u=g z_4_pJ@w-49%dbLdn!2#3@8l;*KbR*jr^?;4`L178IJ$H^UHEAM&Azn&1{}HvzYNpi z-8P_jg>&bltvL5w2-lM8#7^NZr($4CS9)B82%8|-|K>8;9u--AWnd9KKPd~$_qOv9 zlak1uH(T+ytDN}O*d81{$cQeSR>fT`r|{;UB|`UGjf=l1a`$zQAY-{FT~sfC;uRFZ zH`@`u2>yhNTHj!FfY8^|cjIome1#m}5dKxy2W8hKz@4Lwu;r-%x1F2?9x8r3U41G3 zQWfSW(tjb&aRH9Gw~uW5{Q%zmaG^(ZKQO2B8)3kN0rX~_8Jv}<$C>X&)9;fnh*KR8 zfzj+8a46~*NJo0nl6VJN8D>u_#$O`0ofF8|gld$L6*@gL`Y|O`ns!~=f^Hkmv9k&X z;nG<#4yzkUKbTdB$&>eNj}8x#A~agAXHbHEK-dYI?B@_JupjLuuB1| zZXO`PNnU8CybNYpFUP<~@n~S~#`=#t3asrg2sjx5<>K?qLKu{I-)d#)bN?d}Pu8nZ1oHv;-G3-|Z^k-*QpZ_0|kZ>}1(o-ABws zU>>gRBiNK=k9Q0lacSsl;{L-HyT`Oxt%}Y9f9Xm#r>2m7?Uu(g7ax+sP-|rGPu4tc z9Kxo@rC{~_EVg!@8SD(0LT*(SGRRgtq#w1|Zak1F5fGC@7h0>F$WShMjYUpN)YK6N<(3dZ4+B#)0`f4ds zO$>qHLH2OaMHwQE%W>nETJfMK3Cu3WPuOdv!oHtcpdTEI#}{hAsK5*2wn1_vAY5Qq zyXlLSeUYUI?~{tlk6A(D6PEJf4*C7{6X_jR%igu_V&r2VsWJD$Ezgq5IrP+>h!I(ST1dp>H-lFv1ybB`hYg0fTv>buw6(yvNk6@zfwE zSzt%Zpg-l>@#^onqDkKKk!B~cwE8oc>?KKm*%e`AA71H{~+c?e%3p39%Fc@C0KTbYvJ z;QM8^9Fs4d;a;CdasAs)XwbHm=k)6GBd@1ZSMAZ%Za^|M{Zzs9qdoaE4L4Ghdx^8| z1a{kSFSCafQlvQ)yyXY6(}j;%mxnM%%?l;FWRh^aIJ_y3sgJW^0k+I+sHb;pC$1RD_J$OE+oGcn!!F~pwXWq8I z*qHDZV$jseHteXwjj}NqBk-{tPQD;XhNW=-kdRyZJd?GD9L3Hny<+WAxv*SeB;RUh z1dm22llf<&$)2|VFsH-~7wc+~k)IQY;=P+h${`smi(k-0;d}-U{er+=yC%dxenvwF9p*@ zHv9N{`VHkk5)>qA_OD z1x%b}h8r$U<+AxQv^T&Oju?k?HF6U?6Lt8~R2>p^WgB>Je+Tl1WO(s@U84MHA%7tr zD*9z-N`GFpqc#VUiTnx%JKN$}k7*q033rNVg?3!}`8pWWYKQ!}JM2HKPJ6$7#Oxm@ z1mECs*3^}X@67|?lqipUUN6s=EIx=k@)}U(^im={LI(DDHo@VecgXjS62i7^fiJbD zbV=i0?if&oRVy=S$>aO%^HgQt=zaqgT}JWuMLR+Jk_Oe$p2CMTYg4T;E_9s2Ag*UU zk{Sw`m3ar;`1-R>tbdL)eLwaX&f41qK3~hgb?jD76YQLl zMz3CuLCycvaUEQQU;Q(8&bVVmvFt%S0X*p^iuIPys<8ptC%NQ>4$FSm3s$)*uD`52!0%% zDhX|#8=+~zSSnTU6sD1_I3^kehHEKVG(VrUoH&QUQ&q4#`Z%f=*je>O?}Za!hj!KJ zO!;&?nvK{Aaek@joO761`b~rH+b$80A1Q+GOOCw~H?uXy>tW;%DUdo8f=zjXn=1A; z3A>+yP8&U8tl%=~m~IM%$5Zk2gQ1|+UPj`astCSXZ`I%vfhz}igU4EVxbx-&-q|w{ z-R1V<=)VG6t>cV%eOLqOXwzo>9mmN5rKQ+ont~m*gCV$gC)}8sZH1@WNz{L8t;M*gRHJt?Ko2;);n`ss73B%xZVweh<|?2=Dq{Qj9!3SJLS-+ zFOSp~G>DgVuY`|n(oopf!vb{WS>RK_Uvb0~%n!X%37O zlrj5bUBtRZ$R=J@1K*An5Ii_Sw0fBfu-9`j{hm6g*4`1hZ`8!La}uC;Tj05TNMsT= z#G>TPA5x?)umT5nS?OkHv(=BHLB1!39vQ%e)bd@q}?bP7)y_sZc-FnOj z9LrUPNI;Ks1P)LC&CCX7p_|bavcv5U9uUq9)ylCX^4cOk>GTj@dn3!LJ}DAwvXkKY zR#}0wu%AiHXtdH8<&K&&v|+x$FbJM3oQYb_kzVAdeg4Af_Oa)Wk1>!4sS? z^splv75z)3rDzM@DQmEQuNJO8;>XH*CE0J6M5x-X0j3kW$;^qd=(OG#-}M}2A-B%3 z7)LWmTWW+8-e=VmYq$KYH0&+L4a7CYZ$$3{;shsyce+0^k>Vz0lOh}M!tY_h!y zS+dLLTC{n+`2J_yu=;8b_P?K=S)F}?Wo{v<52k|$=u+%iCZBCK>sWbSIoct70*o%NA@uId-X%(G^ny$6s` zO-J@de>2Eet%t%oL&ywX0X9*QB7@C0Nra^Wk**iA2c2udrl+{3RW%-@U3vxXEMZ9- zdRfN!1q7d{5d zq@jv>Hy#{v3S?gQke99q3c2S=!;&d5^z2WuCR+t()7q^br1*o_Q;kl{7tZ?LJ*YFf znMCd$2a}ql_}5EvbeYs)sMa^4N;SgWWt6~M`#6wyN2TM!C#RwA^naoSGvxX1={xAr z7Jahz>~Gf7H-*bSUx*XLcfpAzvJLG6Fzt5>E0hT*k>Y*gNm?;*v{Z_R>U>9oyl;Y! zJ{8)IOhY|Ib?$5KCrVX!2dAdrV)Yl5;PKIuIvyCyl}6lQJ$kbAN`@!CC`*Hjrz7Bk zVWa57Ya#2^sz5(oF&9Y$EyQSf6Z-aR8hT2+#04{DKr!$PybM^v;jT3cs8^$#VGenu zP>TOj>R8hkd%oN>18c5X(D7H_gGPQZt`E8n)j?8pUGOb9b0!ooZA*pA{=YG6$zbj@ zYP!J0z7CEmuf(Y%WayV3W4bGG6#A+uQNK^eYTTb3XTCSivMH_y>39n#exfc)G_HR? zjCQT$t6c>LUBFzJ934qlU-qWbs~@0w%w;_3w@G|-*Z}Thw-jc(%J7ThBe~10@%S)i zt@yK7A$;=L&Rd^-2e*YXeCGXuH1fOz4N~eS!>V&(ScBl_7*o)hMn7aQg$wwT^D)cG4=8NxT4uL%lIiqXq-(p2ClX|A=b;IC}KPWMO774=RWL zMNg$`puTbgliI6+DHlJo2|tC;cRN*ZmwJr8Mf33IrW$nnvJ;#-9oV$blfbBaFSZQ| z$4wWTakl?-EHPP$p;F0Yn_C`UpEDUuBs}5fOAV34pKapENFSuVuf-8jrc5g?4J3X| zLCcAUN!Q%68rCAilKL5@?I^%aJDr*B^maDkvKOlu7KQPc2m3P>u;aOb=&i>v-1c{5 z&E`d3@HVfFJ&y0gZ+^eYr*LiZ&UrJtTry0Yl+rF9@3;y#^{#}sNn0^f=MH|)Jd8Ky zyk~NQQpC4Scc9$IgY3`XNMMRWPfGbSXt}pzSIa^2x;q)1JP0J z6U2w-JtL!iQ^iwHyWp3l87ynrE%sD9+R8U15uDa)LdcFcZ1Sc-kh=Av*i*>M%6WyO zS(SH4hN@$ee8+(4m=QU1(LRpSwc?)iMcZ$Jf=jT#M;K{Ympzx zv$kZ&^6Y0JchgB(*8qHcXf;SUy|t9MalfYdcCPr*TN~_K7Ku&Izp-oPu6THZ0?zlS zCr+79NYT!3M1S^nSo9|eLVCWE4xN#ZyK+1!J3p2Qc^u-O<|F=YEwBPtET~E8n*yVT z>4CxP@$laaa~!uPhF#6|#@|Px$ZMhFq*1bmInJ00!e>1h_D>SC9&7>o-UU!vF9TEl zK4X0zJ>u2L`lP(Cf)V9tJU-$evpD!#a8+kn1&o$s`>sY2*O=X~NM8wZ-U_^M`2=u$ z>4X95F~nwgDEvr{!)&Y5?0iH#epJwg?@yy3_}gjtU82Af3@m8V(+618Uk_1OfkQ2I zc~f{Nu1gX!n9><=RkxAIcI?G78&sk4=oO(KB)DMmLh(Y711S2G(&%@JG;FmjU!!FM zk+tXOMOA_Ic%c+458THlQ7IW}?~fPlG?0vVL#h<4A*Z=I*iU!)>D(}v&-of*g)zLP!o$AX{r zYy@FmdSb^Wx?fe6@*&62CF3B&F(3|=$;IC8V)4)KC%`%HIVha_B5=i4!SDE2WV@6! zRDlHxs>s5i51FXEz?YVkuY#s6l5CdHQ{4Vh727qBv2{6vY1pwp@Vzw>jHX31djTnC zvnR)D-H`J{{oiq>yxR>f_XtdR{UEfttsp9Bn*#P~lbF+mMXYA>M6`+BCdNrYD1B`P zl($weZZ-kldFe0-IVa*VvYgy^?I0ejyG0e_)nScUDr)75arqmDug_{>a@-SA+1**= zcGm{AEl%Mw#WE6J?IWsi6LO(Lhl9ie5uW`Uir0^okq-4Etn*)^$S&6(adRmdTwNwU z*`Ow#b!aO*Ss~4=*4@Kj^D4#8>yr4O)aUTe#2P=#-xE3+SJBPG9!i3u_<_IS0^4C0 zcUTw+)p1#vmEnc|CS8V@{d1`EfeMgme}uRGOW=0aGI%!U4|5G4L%uDwq$TZB_(6Sb znm1((ZT(O{{`SekJjau4(jz%Ei;?B=ulDlc|J}tko5T6`1uFFSzN7e{2B`6hR_5_3 z0;t7Pc=NKEyT6`9N83NfcK1E7Vr>NZno%z{*qX@=Ge=UHYYTYcJWp!Z<_i~7X5-gJ z3urMOjGb=|!gZs$Jg;s7H*_3JXZD%GgALAnLu3}0I^Qb_IyaSnSse_b1V5gl{F?lv zUbJeHj>WXBbUt}xED4y9Lv%7`k&yZXG#qe|sf`{)4M$dBi_r0|{bV6p^hD?y6}%!6 z6BnUV$^vE+6A3ETk?3D^lvN0sSc$EBt#W;WA*E3PQZ_V_fN%3ynf(y#x-aEmE8b3 zHee)CKcM`&T{2vO?u_-dLn;+9&O%j01;bX;q{U{ zs0prQ0iFZ-+j0s1Ltb#}GzLQNsWvnpJ&tekk%!miam4YQ9DP*t38oBG;sJI0$n@1| z9G=cb9q)-kwq`CL?$A_|`*neMM{YfKobqGK68bRsUpM|#K8FKU4#ID*txVC>8ka7S z7rX!RVa`=kVZ-xmpwA8QNvJ-&xTOhK*9BN@Gs%Q)H`cQ3Th}dT4v>UBGlVSdflT(W zXE8lkaGsqERVDAgW)NlPa)_lKxc!+1v`c7Gr31GB^(Ju%-5tC%r9fb}48%uL3qjs4 z8B6vo;wD6oYJ7Q(%er6VqS!8wE0g6nccPa6Kq?vcwlidltGa!hRhnyulcCMwh8&8r8HO)kOk|k}eDd%KnAKJdl zgx|MvaKy3*TsQj?a}17wme6=^y{87$wpGK;D1Ba+D2;PAdGcf4HQ?>)N1tAlp%->m z;ak_a1kRqr$!jyYU49MxS2BV&K9}dYza;spIXQ&QX(!%F)2OCRJ{#wjgxgko(${+> z>DM{i;px3GoUXcplI|+})L9j}s`v`=wA10gi=ye?}$rZZlh<{4t z_>|O9Xgp&Y4{e@G!jxP@Z_f*k+A$_Pt5^jtznKo!wkdE@Hy+J(p0PvgH{!0i+qnDk z9#DiNP*+(gxOJb9h@X8ZqyB`v^tA$+mkt>C3h_UobCG8&Nkg`7-H4X56xGWXv#-d%B3jZ$bv1GFTq7cu`nOLQ6VlHAd~2XybLNX7K>E0 zm3T<_9$0EE10^yWQC)Q5Ay^luT}nIqo*>!o~u{67$hu)D3w(xao?S3=YL?9HA-j^XiL_q9V5?5HbcbT_L>L{ zLvg*yV|Mjk3mf>jgqgk=#SXR{C9j98lk`AuGP~abi;Z59G4vrD^L)SPr-2K0^o)X~ zYJbF+;#a{R!!mYF@+6Kwe~?WsS7Xx^R>5YAVwC=)4QHlYw;F6|AU5_{!UB5}(S5xF z{77_wCf~8}Tm1qKvyT>sx>Sh^UWSv3BMuM{c?xPm``L%*MP!p&yWkH@5m~nheGhX( zwq7xttQFaih=KYjJZD)}%N#OMqJecy)WrG6r?bV4QC60NtKs$;8!Y!*3a+P9Vacx3 zxZf^P+`KgkPM*z&#IqVa{9-Qc<>@u~#kW!Br3JrYJd39sb;S7EcqSCh*sGoE(CkYV zYV)V~Xud3Ve;LEig`dDmWua>yavg`yxgnPG*avH_C)W(kkLRkpi$&>?y`t6LA;e?h z3}`c5M0WJg!B@-O;gS@=cLNrY7WRehH0&U?La*uF1T8FAj)vhI!!Y212dk3V0qPHf zz@~qqDE;Om67g^<+Vox*N0g8n|DCHrdW|9Q_x8Lla5~yZ%n|-(_nG^X^XUHVCYxUA zAdZoF2QF{)=s0y@U*miL$BT<_W!f!t5xhhF|WJP78t(kuQnlGgsW-6^(Zt7QsL}!QT{-Njy&J5a~0X*s9gf zu8kKmuTKR&Sd12iI(3qezz-z2X9JkI#sa(Q463OWB=`7Gyk=L-JX$V^_Gx>fYEKLC zc@#^|+>;RJr#&PZb?GEaRvz5fRfvLTU1Se~-(Yd@3B0P5Nb*;fGPOD@*x8gw&bQBm z!-4I@VzvnmIQmLtT2^PZZoVAcs?ot}@dCc*?^MtqOFu-}KvyBBkFsZ?ei z|3uU#X~A2BZgRqfr>H5t34WgspbwsnAu3az;+^Uip;uOp8m0Yg#g{mKV9_Mrm@kJJ z!uNJa5MhO_8GU> z3y#*4sIKi>rY}2!p9?<0{!mjs>X$2d`J)_zSC-2vaJJlZIW~kSSOxpesEd%PK+D@t(NzeDJtOM3inUo;fo?-E5CiEvJVLd~_sugUWMqvZCHA>65WFq|#8 zP0B4EVtVrv9CRugc3tH)Z4yl^-&YGZtx?DG)n2?Q1F`8pd!f@4N8Sp1UQOj)C^~RW zq!ye2P1kPWBHv4Rr)v&)4z3j4kJaX0iO(Td`7P+_#qocqhSMqu1NtB?9cq_7hF|%Y zU}1YPn;$G>3Z{O5YnPp=)=edPux%vIU7ARKJ)6wK90cD}?FHC6&6PW{i!giA4cv8f zG$a(f#-;mi^M`T9Jfm5f&loH`3oBl-!XX`4mHiNn7kK0TW3$mPD22uAJQy`plh9V!@YI1h3A> zas5}BXtrVvNNv0!nv%SgCvNXVi;>45dx!zukh>WmCkM&}Hnq)`3Gkq@4y1N2;coe9 zFv4U$pKWpw)Es|8kVzMAt&yf0SE9k$vz&O2I{?G_eZg0&0)I<*!Ba06FwEEwW-ko* zvq3YdWJa54A1T8NS0>T+24(u>aK30*S_^Ef4#wO76Dq~qV0v;I&Q>s>T{l{A7`kzX z5uR@dccex#s(;6Up`e|6YT7kBx=0Ss&9c+1*NxGkyp>%0E@g0~6%UdTkQ@I;5c)$)XXpAmTP-(Y%w!423oej`eDEMxCA zgUGU@x6tpn9p=72gX@=mgrIW~H2dXR@#tSl{DrqX%=mJQ{LFXahi@##-dDSL;pG~n zTfTz5WCkpie=g+A$AO36XC|7Xz+`__3VRDRzA?y9Ux zZVQ^PjzZ(2BW&E#NRU+=K(peM_=Yq8ME0w+(P7_8yxE@w0&Euh?aa_(bTmr*%Mx8Q z2|y345U9HAMQ(T3vC(HHHs zp)Y#xKZ^~SX?Q|kR8xE8HCDBb$BKH6>LVlYd{q;>VYr_e-xua1_e*P*Jn_LfhZA9N z$vb9nBpR*DV$knM9Ba87f$Npth;K{FgVk&!44Ze4R4kp2?VXR=t-Tu|NaqyRMrEM( z>r%36m=cTMV1qlxI$`#g6u3=lh_z`k`!+cMQ|mWCo#o@2mn#k6153oi;}=71Mj}ki z3J2SL`l9?FtBHAsEmZCQObR8;&_HJ{=5~A~_rn*XiPbSC`%M3>ihf+RqJ2Yu)_6;u1MP9oiTbu}xT$XioPLqY{H=0{ zvb5m7I_3rmIR+RiT}(a;^LUH&JHq~49%M}hz@t7xEDIVmnhC68vyBakKTQm~cSfyy}T^Rs{R&k)E zuo0hy`-8;UAgcrwUm{^;PloSXgKD>wu&VhO;l4IFLRRqb+Bl&6j<+P}Nd*25SO!TR ziJ<-PIT<7D_`lDY1>F+D{?`cb&Zu&H{`nX5Nh14^nuW)94`VJlhZtY)!}WKa5RZ3D z#viH4JS}1t&p&aHbiUEy4zZ0yH&F~(<0Ww4);%I$(Kxz)tsZRtn?^QHeQWt|!FOgO z{eV2&_npkV7LCmr-%;Ip7+(3B03)JQVdkqOyfWXE1N$>aw zyChuq&ye=Lx5kP;KXJoQYd&CfHg6c$3A@{+c)*T*d`NK)|CTI^?}G1%{$_gMYJ&{+ zxGMqsz=zMccLR+*#?tPM3xS2+vC3?oME}O@pjEe=xz>-N{K&BknsoCQn`Pw2cb=QT zCEaXk!n$pI`lW7ox3&dNu2P|qUi(n-*HKtg^O2RjhyV{!q)2~|JX2J2MuW+KU)lrl zyRjW)ud+euu$zKIQ%owqD1wBeN{#(Q5hFH&|5<-9>VK;x64!Dq_xG7_F~t@n$D9ExWix*&W)W3sTanu z+eSU4I#>eov=r%U!HMB=b12O}IEcO{8N|3GpL*Rk!4#QPb~$thdu}hqUtI5n`EJXubsN@P;q_9AqiU(DX9 z9fGx93#h|CEo!kX2o}xz3l`VXxZJR(*mgjl`?-H*D~noj*!NRHZq$Q@=4U|vsxo?@ z^C>8~yP#~$5mCE-0f<8OQohlM4oOJG(x(Zc8C|Ey^@QPkK72>&zAJI}no35#j=D#|0>Cq=X-19^lj%~fj?pa*|7Znr!gB1?{n7uHUdJg1H99kLsK>eQX`iYTwJz-j?exHf!i+$*^^r|^x0xQB4Z|Pve`;U z9?3yXt+!Bo>pj*@7UQxDtH|I&G1}j`0zJ24=;g7a>C;qaR8C&SKL%L9!oE+UFPr1} z#^-gYtXhjQ?Irw@sh!YwNTYA!_CS50J}8*D(*1$ss8jnUb|UaBR=*s_a-Ogjc ze!UF0*dn+A25uzJ8l>oM^)Pn*!))qX_lRlrSA*+G6Mi&OpGQ5|1Ur}eGL?D$eE+b+ z81Lc<*OKD-gVF;aZcBuN_dA&0GHJYgE{#7kjmIOCWMO}!4_{;02a*k|aH6ADNOVi3ph! zg~*usy?^go?^^Hjx3%u&&fVE(e?Hgs{^xCM)4PW!dX}O8^1m>qDwC%^RN=2j1ybE) z6Phkl59^jk;p{E(5dF56?DfqMk4;pkbcP01HmZVR>x*DyJ_B73n3IqS1!^)=7pD9W zSOXOtsY47OFylBZmDR?@kNx;^zoTH^D0qGPt5I%9Ge7!Pj(Exr0@v|panQyB2wFRV z?$p{xejIy57j3nsiw_D6-~Y7v?@ixHhg>O%HuB?(5~3kos!L?jkc#tP8AE9#Me^qw z@lRVXt{-X*r4w`@_lGrGW+zlWj~t5dM{g&$Nk-VuG-KbkQmPj+Fu z0fZ*c!H-@&V#8E5mhtY^w44&%-R=S2TG zT(tVHd!kr$ayL70<`ipwT~6F2hd@WxC3aieR&-H9=qdd>gO9PDCD-34j~cAtUaK^= zKR<|S2?tm~VJ+FzF$%mSkKp9TFWFj^dR$xL!O8``WImR|Rn2*DaK>&-J9Si~)i2C1 zIsW)SbcB@ec+VV#-zDM~hIrHQHjxk*SnK=dWBC4okP|Y8m>DZGb3cI(WO#?|`yfK- zZn7?K$gmC@cUpAE<1jwXjKM+_Gv`uyxa4RDANNWT|8W`OXKRq`>h(eG`eYD2567Tp z15_9Hi*qDLg5{=B;AFA^46_1Rpx-9;FoS`6z!o&<|w_X#)fn!kD=?KXPDG|MC9LrjUJty0( z%9#I=R+0b40I`H&Vco?0aS*lD5*-86nEYiaY-y_%HKuehCN&gIo~^>BZap%2+GwQ1 zfPQ#69xrrt5Dk@V^ck80u7iHT%(iyC(D@kd9(f7}l!acVn=&1-ZaCD&SE0)Ar{Xup z)o|N*B>y7xH%(&1D4&)M8POB?x4X&$r$z+|+W)bbgO#{IxZlWZ4#9@2hAiM&3M}}u z0iQ)}gw5ewu()3mO>Q{CGute2q;CVWjhQWu%2*;w6Yh3D;ULtVwo&w=xrA*OyMW=P zV8UkKB9`CN+4J#!xZu4TbIIOEB!(%0#E}C;J#iybUNw+yc6!Yw-q!|&PazN`Ib9?h zkN^>KahTh6m=u+MAvM=+Sl9krLTBh8dA5#=ikuFK*Zyf|ab|Cr;_B((Zd1pc79q*a zawivGs^EVkrBPx_5_$93o&{zMhp){^;Ir)yhA5BZ(Z8cayL6nzd17B&x>bwnOO&#Q z`V;7}g)Xq8F#vKlZNORG!aezrm&{u30ygZ5Qizx<+ySJQvNcFXB>P;`#jCaC~8zhXaQ@^Qzvr;2G4)yaAu7Og5?MwhNT3PiJ+jq#XBqRwVopNO3W4NVJ4nnop)Sb5p% z!W|I=1xAl#gVF2qlk0yx^W6&x7KZx=OCAQ9jGu37#vo)F|&id2Mem~v< zx?jAA{(BkNJWn5TmN;NVu!%6c`Q!F6gIGp`1Xulj*m_N60#2CmSnN?9%}j~FpS8}z z&mB|wl5^c4%IwCS7D>!#%5g?RQdsc6%V_gyB}h!y;H_*byq{LXTnyHujY|iK z->m}k{A*#5cO8=|>4jO|Ry=CqHJGcL7BY)&+40q&1(-Zw}mxi)okJ3E|j&q-t|E_a~j#w#m@`T}&` z(nHpDKZ6Kk4|-?+S}rT?$o2IEM|jLMs;VpG2q)`8p_d(AHsxez{VX21)1B_CkfP#_ zAX=Xp!&hpL#j$!W0ounLNnDi+_#W%O?vxiug6zfa}b-V}u@+cC*5~ge6q-R3+6mMGZwv`Ke3xYuB{pcxt?+0*bxOOKelrJf!=Us))kN#@`BuSK7p|v$MMF4 z4Dkiw9{AL>>m=6i5!0Emg_-M@V8q+)kYTRK2kPvBz|rk&_MA$=B{+tZD!1XlUz~ne zXu;Pl4W_RLN%KeB<7wUVGq^f-0RLk(l@_!rbM4wu^oQJ9Ix!}Wzt$Xyn#q3z)~^=d zcJ@E`dnAwFT)CHq6tzIo`*--LP6vISTMAu<9NhLwl3ES(6|FqwL{IIt;C(lI;hf$^ zK4Z!aanPuCYlWEC%&9|@1_)=cx8L`YV!@#(6{E(hYKmasT6q}Y6H3hgXy9jqtMKP? z8!S`5gSHd(Y3RIU?mAbIe|+f4ORq%Zzj6~;m$ZqyZyw9Kb60@r?soLF(&mBc~@#c)sE(WK1*UCVLyW!3SBUJ2?p~j`@I8Y8EG#Pm!)CiI_5F9tIx$jS0&# zIKu&SmDdCIEsp|5p}d~X)++S=?{*O7F*yGmB* zsXk@?HDNg5Ni!B0*@NYVS>Wp@i%RYzn9a_cB+1Z}WF;3ciw}8h`{itGm>Wp~jTK?> zz)hkFHdEoy#eQ~MN|9AM3Ei>OG?bmX7HUTY5$}NY*u1e4iL%f)3WPdO#7S;8Fl+AxsFa=q3j>dWX!AI5E1E{c(XC`_<{I3$^bq+M*&>Sc z${;hg{$)}NBth9z52NHXAurbhT4q;?I;Wo!m>);$+h#^%6aOZ@pA(Dz4c_8+z6oTc z-5VBVvbVnINx$IcFb3NV*`nUfs;Jz&4I2AwLF!C0=GW>$et|rcuei)YW#hnh8KCm- z;W*!N64Wi8hx-pJVSDBrHabh#b;hhi#SLXF+rXx@F0H63ato_BT;p>u-)G!95^Fw|v#W4aE~qV;>n zLDB9ZL_IWuP3XO^VlkOZ;d8;1`w#|HDV1umqlii2N7#<#;Dv&*14I3 zu;1`8A$Fs|JVgqlqGpI}r=GNSn6ig7>Nd0NktIxGxJkYLbz`_Q%pYcIR>HpmN$X$A zhuEN%{p6y)DV4Q&g#T>R=%cDR5PSYCJJqUARh^W@I$sca0>{z}&quibS_4jWjKXQp zB5_HfKa8z92<3g5o)zca2!8Rse=nS6cb;0+T;6)n{0Ub}SlcXaJ#It7>gbwh> zzlM&G(Ih4xo%V?u96Vs)M?)H}x0BqsKOb+uR^%bKjL3XFb#8I73EH!TT>J!qySPmZ zN_8tSn54n-51)wYAsvYQ(hENVo``1lO4AtQk9gR+3WXFJuD*Bzvu=cvs|Gt*v)v*H z`F0gnx7y&dx`*W5hi>Mp)XSb7iGj3J2bghhG-UXnAhtiI3v*7jIDWD-zPUFA6!wc) z*LR^uG`~<3ldcRU&jgm@(Ji3qJ_4lzW@ExUpEkXqeSo-abf>(uoxDP&7Yj% zuAd_;r$J0k$KpA2B{<|>MIxQ=khFsz#h06tp<(eQ(id8d?nRC4gv|+YLT0?k z(yWg?&zDE@l+jpw!3;#bRj|hBIa&O^0W<41;}Q2p<{+CPocDjB-{v+rvqhTMJuE=g ze*^gAzR7rg(F^#uXb!o)@DWLioW#9Nl|=sQ%Ehby{)ByAYjE*xHPHBGOE>Qn6YaHy zpcFn2BE_FsLDM$WifIPha7)neRiIxynsL>tR;ahR!*4b&xvZ&ttSM0&Qt@q!hQj`jQFuXiG9=31hN!!vxkKV!@DSeJ z@5gV+bc;evi5pIpUr34_OY0y;!kR41?YDkWqX>7m?_&~GqiEVb2Uz*y6vS1G;|=|U z`f9}s3;<5dTN==L&I`7(7X@$bI)@y=%}>5oM=vO z4jBx8@~=VR1zCEkdIocHl&1xW?Py-`@NC%IBnWyim-h?*?}n&95N}qA#Mcju@Wiko zsQtPeb_;IHnDK95u<1zRA$5_RxMKrbzNGTao6m`qU!~MHR1Sa__3gMLCKT;*u98u4 z6@)vr!i^^;baa5b;1OL9E~Pi|zy4(G$q~5eweR8fO)s)M!;`B!)#K^FObVIa{Exy|oT-ul zqii#wlljtq*+aPf)j)i{Vl@l-IfbYDdhid~pWtp~omJmaYw*$;N!Ld$f~VKt;d<+A zY&)_U_C21=y-U{POD*A9ylkN|(VHGGub~g8PT(uY6rkC_bTI!q6Vji}q`$3|`0E{c z;;>8q#GC&*QWswpy8h@$cv7Rw_jM;jW!*J)aPIQ@$@9je$7nBZSoH??YhGjyYOOf> z>?rDL8wrg8sX4%elO*Xg^S2l@>9C*&TTEOhifDR7FQ(U&V`)GsysKU^|LG$D3 z#5-DiYsoO$cXl?~AC?D2!(R62!vMT8)2ge?5#l;`9^+HcU9cNiTtJ_uR=_u|=~m(#(U z2%eXj$KSk?r!TFuKzrjA*i@SXW2|L(i}_(_tA9!EZJ<#1qE~#dMvc2JJBaq%+MvO5 zD0MmM3RPX!RC>BRpVTv$4tIKm!6KnQulhk8EhF5Sm;57vq7syt;l<$s*E&JEg7@=P&|8_94`%l=#nTa|Rf-XEUQB zPk{%biAs42=%o4#2W(3Mc?f2kN8Z85FWrfgnLdWBQ=yG#4#NJZVl327V^g>1u=oXu zf;!ci`ak$7v)$@-UDH8V2M&h85VvTR_|;r z4Tjs+f|7iO*m#x-*?C5p>BUX2Uo+GXt+$(y3;p-;OtlMpx-|y-zz0YdqIJOvaqG(j zRyW3ugv~R-IcxU8hWRI0;*dF*-qc8Tc-#=nk1YVx0aNPx>VL2^?<{b8{v233=P15U z{(*z%2B1&6JGM=8!zSV0(X}gt^(&2s>Hr(m-FO+)8>Zm&JZUE5)=Y! z06fnM^Ng^wywEityF6Dw$h@uCaxa5eZrDKNAd?;Pqu8Fh3T?~tMY)mNNYHKrxRkJ+ zHB{dtQ6?Bm}XT; zw%&|@6+*_{Dep0J-EoEt?Oc!bGv`3Vr#tnE-ezbvsfw6bRkN(U1EDD3q*~#SQ8$CttxS-dMFG^Ov}g@1$ zpe)?8#SL2z6&J$L-Twtsi_?Xv7ur~n+a6N2?~2Iw`zPYBR4%?F=Y{q=6QJ|fJ5o7X z4U5e0GDDYAA&)Quw~eK6Zs&QnYA$ElQXA;e8xka}u?%XyYzDg=9p+c?h8&(D^nhFo zxlVQo3Cw;>9=aC5?6J>4&wVhD9;3^xu6!iMllL%{nNl=V*bV>LuTEti1hF%-qL7?_ z2`q_fnKyThll%&*_XL}Y}puLF1#EjYE%dkIz9>LtMQ4cuXP7~lUx?J=K_0kWF?q9 zRfQx4ZD!#iLcf~=ug6i2)EJHe?e)%3Go_O3oDq)!vdi#e;Tdvef)@Ta&ja778$i** zNYU17o@n;Fk?fAt!Ht7-@ygt6*1R=}Bve+QrSEWu-~#_o~R>8+qcT^SX)k z$v6=9^q~0Z3pp!fJO{i{hl`7TiyVi(MZI)CQD561vcaIrJnObWuYh1WYw$Mulo|Z z4E)71)VJv)TVs;h$i^7>si21+Yu6Fei^BZfJR5VD`9M{El2}4L!K$QL33AsalMan@ zZ2RZq%t>b(Sk6_&q{?)*-`ftmb0BAHdJ843mX#w*QX?oG$RT zBPUITr9U|<5ijEIRtq7{@&?vhABUg;U&MBr8dTQy8fqI$(NnPl@puk}((lP&KI$+Y zUND^RP7i;<4?FIZVjXuo0k117;tpv9)1lfJrd$s*KiKhGQ*^K_ zZW0ff*<=+^6GbO9d|?j+{p$O<7ua;WF`Tby1(kJVfyUsr%@@KxV>iLHu zIjs@e7Ul@=fWfZo+sOw#cWSUkgC8^Kf}s17ylu@%61DULNmRTAOWP8`s=x~lhSfs0 zTQ5Wi^T=xNb+EHEpEo|Rha^6nTZ0JJ58HytHd<)us15(coy3F(cTid~nNQByE$~bJ zuzB}5t64DzOh4_R4L|jHW9&dsa53O{V;jlU*et$U^a4M=ol3VYJ3z+E9TPiltH9fZ zYU1m&a_ByR&3(sVA_=rSjV}+KBBsK3ZQoTL*ng;ly?^A-k0;Lp(*vC#(#)hogon^7xbi z{Nwg^@g3hG{L`#RoVqoMRwo(JRX?*?PqZ%gy0?lQQdQyCht`wd<=eqKwcpy2TF}U7 zpf?2;=<`qZbh^`VzUK5PY`P_nTkmIxrj3|P6RKljnUxL5KTw1jKNRu&psU1mc)xIt z+FSSEJPZ6$ZAg#lY=w_QhC%ZsE!tcyFp=uNvH8;H`O@MtOuDjzA98ua>`$4Ar2KC2 zt9lifm*hk{T_xb3(<-*+fDJ9l*v4<%(1!E5`rtIvh-n>qhQanK)Gy*26v)f+fe|J3 zH5>EcMo|VtIfde+Cq~?G_z5=U;TdcQeoiL3Ys0MjChW-f4p3VVNjJyE9xSc zZ+*)PzKn+tODD0ecfUdT*bi&NNl)=BXyb&CZ7{BHA>E{=!nb(ziBJFd3Kdlm@M0+8 z_A{4Z+}~(!k{wPz31=gqR1VT=(6I?bXyTpf-%J|h|@8I*HmpHUy5Z#$G7S;E? z#7iE5|2KD{e9VS;)<|-zMKZ)l>N?UbaU$v8CNSodGo~5OIJ@gs8us*T7tLMrTBIy} z9KC0)g`rsnO*#6lP#`XS(E)Orksfd}((8eEj1cx!HKRd~rKnFP6S&#Eu{aH}^ z0#+LSfmzw7fx`-O(HiNk_%CHNY*jJ9x{E8A%9(D`*Ji}VR&8PxPMhk(#*Kx~za7MT z_8uUmCcebZekv%|siMEHJLMh?bRT$uR&~fH-~Ke{f4anZvseI z`-pv9W?+RzG|1^r$IdceCNE?ngZkCrP1;iuwIie6@Om)G9y0{4&31-ehU+m&__^KL z|CA`WykmDPz2K&c!16i0gL%J5t)FJN7g{5X@#UyBcuak$==cFe*btozojXk6NM<$u zdYTM9PbR?Ct>4)fuMtA8`f~kV`>#aec>&Z{sDalhLp*mc4dzrU6KCUAajpDH`cPUQ zZqCT2Q~X`H-pqF*6)nNb^JO7~UB3$NW|tE_WfNY1CS=S%9snnm`TXkb2_Uy}3T1l) zCf9r&>fx+O)dM8yuRGKD%ZCa4>Bd^r-Ma%-U%PNRuz_*Q1WbDA$U3e>LFm1HmUhAu zRfl%6dNpUfARdepTPs;;$3}d*U>L5rrh`S#cayT5Q>_0?7inLc1j-k;GqcEGwr{!M zi@f}mY;8;-j!%3AZYvi(YfurbnRgElCp^Q+=A+Sc|ZY7#9`@&Yz}%@ewIhZPSEFtCiT_ z3A4~B|CDH(LQws?z<6Q*egPL}&Bk-~8&NW#2!kYlu{rTk@N{xI`!+TK)TCasjaQ>s zw4cCzUf+l2xoR-$lM~uJEE5gi@dL+-qs1~0wPF80ck2@`y-7xNIP3eK%8CXCK)A-$EYz`2IaY*2I?c2`2PI#3Trf zp90@zN?rSKj%SG za5FfVd9i0VHxi4Z!rjKd52R|A2Fb3Z@afBBl#Y&pmTex`a_FdV3OUYh{GAIDtpV1K zvc9;;Z6r!uj~AO5+$XllpG7G{;(;HUiJk5iDCMPx;paDj5*Kp!OU8@;ZoLbiURB}v zSyEz)X?Ym=Vjq$1b7oUUHetZ0!6dp%n#1n|=A@ejHW4TBu+Xol6}}%<^^mM-9)U{)(6Oh$7g^v$Al`FCRYe z(qKHjZWiDA_Y3wZM?=7mXm+NcrQSQR2W#VRgW|;?{%@`d z^fH)kc@?aU10Z>-9sczb@j#(7EiJv1^bf7VllC=el#@rkwM~Q6_--68+zZuM$be~F zvS@-xaQhrR45!Z~!pM|gII(C0o=wez3+t1igmsge{6k>3^Et-OHHCX!;XJ}`D89bg z!`i}AAwu9H*1EjLl3$B(&}|3)K1`9ghTp?n`E>D%N`c`kXNwQyZh&Qg3?CY@h$l9T zf=spDVCrCj8G09C``-aP%V7un7;J<3^R)Q)kwfT!n!E6yp)Z{)aF7z;D2aPNA7HIb z#ay(!2F{-!#f^G8aDL|s^xCwZtJX{MeM`!4tGpIB`4b5%$E{(Cmo(|NvJvzpD`kp) zv2aVug>KuM2CMhQz^Zy#YH=U-MmZ2Z+2Jrl!sl0sjOFWbGfHltNFrYJ1K+>E<)^myPnxaZu4lct!uNqvp zXC?GzCSqlgF2A2?OoL><;*BeEJR&WP?-TB(VWS_92;4@U9aiC{L2t11$ET9*jt}u?-84SxnhlQVU&D=U6VUUJ1T5+PK)SVGLay$1$oV!HZ@kyQ zALk~E$Eo*#*~*Xb@5Bh!R6doxsXGLlrKUjXE}`4DC<$horhwh_z2x89!>~nJlgoTf z2WJB#T4LpmJyMdOV6zi0&3lan+<^}K?*vSG@`Ln#mf|+{>h#3AE9Clf2Y5bXKDixz zf!zKzUSu&n7hXXo)=m+AMpP>CPx}-$N$3;oTDseMibpIyG%dsP-*xH1x)b=~*ApQt zwu+o}I|YuLD@c{eB?!LlBz}2kCYzr31}m<7#QVd?)3W?flDlCDA0F@z=QBMXzs#N9 zT^LGPvn(Itw+b}hUc=wYsq|8m9Nd*W3hA%BSWnM4_Os0&n`b7$-w8vYZrCQ)a$*gW ze*KzAtxFIXC$ZRNW(FyIF)H_NB2hhk*5{%XF~j6BagL9JFOCMFpcOB8Jpu)OObLsr zbVbAcElm0Pb+TH8ky5K0>}0YcT#Yh>4XJVBrBfnt+^gyMYsv*8$~C~oHKxSPG9SrK zGc3(KjT-VB;Qn+ojL>*be(DQckdqsj>FFbwJz+LGC#4{qnf)-*zym+GXyO)~g{U_+ zLoEIMJ$a@3n}yy?V%HR|krB~)g`VkNK`>33)weW;&J2^O5HWvDYY{A?w{cNp9EHT?Nl(bel!|L`3kWhbD zFEwoz2E7!xhBc4tRq0*k9~BQBKPm{R${{;KqVUTH6P7#HjuaK|7AtQtASH8CSVxP% zNek2#_0%civ)fjpjl0JYvi3NM+gm|uH_e2pV<+O9P#<>Vhrk41qyar!7Q$xH2#7NI zL*A_`BQ^WKll{9>p~gdC{wT(f)(bkY>#s6ySBQfx&pk2t;e&d~*u5lZz!406F$D}= zMYw5i94gw3MswR(T$o)8k58<`zQb~`HfJQr{CTrHgptGa!&Gs^KQ%>lCA5ai| zDX;^}n)#rvBn2e4hv+U@2ihvGn7cre{k~U0w57CRv~VY9n=B)Kx8V@E(-e(WX*=*s zo^Y-gI=n#x63IukQ=;}ofiPMr0AGAk!`ACDnDTl6dN#je`73kC>;MD&wq!Bdnr(yr zuOG!33AW5N!;vk>cu$h<-DdHPk(e!4PufT6fP#7f8T$J&d)Kj>DF52Rf>IWeLkspn z-vN2iu%`#HI7u0lLz*y=DN-|)0X*ci99hxahN9Yda#&;s#vLJWBQYIZ%%Z5rKr43E z2;u#Q82EC}6xE+w5H^S7-;NYqd8`Twg8qp+iVE1D-}~{K!eLes)XX9+I+?Ogvq+-n zU;Ud|mzezN%j{u!5_m?e1OGXbiNpqbBuUaBaeV`e+&2jP_iRJ8ZPRgTEr<{J0^2G=xRE2CoQ$EXgZeVfB9+0-Knv_~}v!4Ycq370H=8#)V z6wi(VT}wYS`)0@VW+>AKAHvDn7~w4WZY128wu7B`t48xJ(KB%2P#!-6-#XOafa30W*MPpcy;-@^p1O*Q*0ZA4BK_mW9&`_S*d zY3#C-4QMG|VKGa!p!`J?_SsaTms=rCEt<)I;>FkcuyVXKJ#gtHe!f%z#nn+T>+Kd==r$EPMx^sW?|Rt$;fj2ca0b07u|hOf zKNceYJphMmmiSF2OUUk<(yyA0{DF8KkDE6V<_-yBHl|#`{^v&R_y~p z^~JRUpTp&IR{USnG;;X5HP=`-pZZ135ZLX51Pr ztR%kriBR$(lu1N6S)DF+#KLwD2zev?`z|RW^|hK1a9I|AczL7Lh%LBFY7=@6EhRoe zU+c=?1!xg87jMKHvupm9>|U2IhV^h5-zQ|HTWlb_ND>UpbFp>ca-yGn1iv=EKs);f z7`L~L4f&ld%IVt9T=lGYv!XO4HQdMQ^Xm9yus<4T8i@0BKcMR(!IvogTlC*~O&B4} zW&_QaqWSoXFmd<_SXq7$;pBH@ll!)FRyGcu44!h;`30teLQpvTGxYsKqVwvHB zbHfgR>an4;V&yb0*%isxJh_VnwQt1XZ)50^b2c#YZYuxdbH(Z#8Az=+ALOax=P>5X zZ~F9G4*pbi#6@H&_|_Iv<2)O2m_z_w{Io&vkPIW!tzSZ?nc=upE!w7^Fej=g$|zmIBgRZyayZ83Nnu5r8X0B35fnA_@1xSzZcf1Gfo zzZz6{c(*F9FTDWiMeXd2YbSKvv1T4O8>r1-3+PQ;LDfBk%)|Y6Q2po-^|a8YUI&$^ z&z`}23G9KpkMyW@^HWlKG8TM0g6Vlt4SJE%UIJ@!Ytjj{oqC0+ zdH-RFm*0_$L?KNpwuToXKUoA?{|JUZjdU&zv#&R%=nU<<3R!-)ul zxcNDBV_+t_9Tw3COXuJ@yLfnWO_^(+DaGfhiP*HG2kg2}a-K#$G0PRMy>{q?k~Uv=K1xV$-kgldkJSp0b zd(T|Qs+_NoX(|N<>onlx>*2KhS1kUN98JPI-1x=!A>`^OO*%VV7q>~9^A%az5Gdrx zmPieQ@tYpw)y=VRx%L|*s21YPcS-p2`WM(gIh023^q}rjWoc?i2rS&+4Mo=y;6R}& zH9we4)n$J}iryz2SMdkz3!}JqzXkQZr$g62w5F9e?vpmTmpE;JK5Prkq@zQPsrULc zI_1)EJQ6+}U6=nA%^tY{|9+VPX?r&UDM|o|r%_b8#GUfV-&sNVBbiz(8yx@h2FwMV+(X#|CpKW+bL=ZJR(w!pA3sOUuGwz9*7pr z-3v3M(r{(PLm1>6jMIxxVB4a>plp;xGPI9~4hAek=TT2s)i@oDQkh0R|9MS}JA+}# zi*)?vmM-MOqOgDWDl*PH8V_{Kb3OIHqLZUO;*X!p$uqOZBDts#0t<(tlfND*+%yAf za(A=l7Bl=-mI`un2cyZ6Rm{J*N4(%%C5d=87}K6PFtgG>qOCtiG4A@5wM-ItI-{SE zi|HxIQ>OW0-{RYU{cm%}lOr6}%f=O6o3a5c5`nVRlnOY^!{`e%IA; z*lzKacy1X1^=}j5?q6@{cl0D(dP478$^)MzZ6Xy*s)z-5#T~oG3c0Zqp$|A2t{(8f z?Y1JgbX@3E1uTP_)uwo@BbH@UE5Z7=%VECYhiTmSf!P*5tT!u&AzySNLHB~Mc;cfq z*!fZk;@+NQEe<{8jZ;5sFTO`wgC;>zQV?q>epK(!HiET!Mxc6$4ryul6@)`M@$`-6DWDAEL$E&j#TV3GA{8J6ZxjU)|Nz#FY zLrUOk6bW}_hOjcT5s-gA44ZPSplHs0qI}#3Ew+@CwGT$4XIVOFHqK?^&c{KxRsy7r zABHUri-@Pp2eRpE1ln%@EZ#Wbo!HWG4Afi6f`8aUcJ0O==DPEw*gPf}t5qX0e9l`@ z%9&2)`F%S(`&i)I2>HW&`zxeqXRUbKaeM6YzQkITC$j8+lR>vUUmR4j4$8X(*Nf3x z78T+H?J0kWg#R|HhSqhkLN^f}v{=9$;|>VW+J$@X^x*!Co2>ZJU2JRj!acrfv|V6D zWmd_eq>w?`d9fIVJ13&alac5hv!4E4Vv6b71vv8EMK)mAApZ92Tt0|;LGS`GoDGV@ zY3rhJ{IZMq?{YZKwX?_YvM8pmmq8?k`-r|pJYwp<0*JX)GM?2@!k}^A*ygq8*v8i< ziC$O=UPvE}?XF2s==O|BtZ6yhC?5^FkC(9Zjw8X=Xc_d&nquUS7_j{>7UXT5Af`nD zGPKUJ^&}OCl7)~kEDb&>Qz56iQA8%}A^vmph=gQ|^=cCUC)r@AToa9}-$g?9*=6iW ztt&YF9zlYf-!oylfa>BEXghkCxFygQ`jfVR?YL;`X6prTwXzDr^HbT6lbx6+Z;hkk z6|iscVZqD#9k=Bv;^eumd_Q#IN$W9i(Tj-A}Vy(x>>-lS}BJd!x9N;5c}^ei1i} zz6#QN7vQzJUPw7)%I*8~C`wE49|s?!L}xf8m`{gVD}VA-$%EfNKx2H=%N=GK$=~GSvPK*NMlpm1U;lpmPegGEM|JcFu z5vU;{Lfvoc!7|N~nCV<5HA4o#M&&PTZPhFm{^J?3y>UdeyJaell}W^RZL#F-G@<9b za}wm1Mv(_M9I&YDkJzi%thrtCa>;_4nkH+`idI98DGc7>2-m|A{hB2>!yw9++L&$Xpy8!0wEQxAS296ILu1!EBLC z`b~Ce?`n8DznEy>ZNQpaRiw7~5AnIW32XJ7;duFE5;0@$)XOG7oVT}d* zS%Ah*@%;~W?3t1s-nu%o{;h5eo3?zHc#NtsCSQ2KHmI26DpDwl7`ZlLRKNn2|Nj}y zOjj+B3|+oDLihhYpWgqTZ?t>F7~%O0XAcnmXw24F8=)^erhTWkghc50pR9AFJU#z@ z8Z$3zh9GGB|IJ7|pd!f%8yzjRQPe-G2-jw4)k zCnk@@SiZ3e4s4A>9c=?{|2>Tj7E&BCK37FGAF@DdKrv`eNoC%nbg0+;MRdNd9bR91 z7!P>4!lyn5GW)%rAk#}heNjX zK{Qv~ggw76zzC};(4abjSIgdnIpz}Vz=Jtz)zQNYfv22}dU zI39e(pO?s*!Qg4bY0CQbpjaqugv)!O(%?8Y4^R~iNHc>=+zMLMqlJ@i6uQ~lpl5V3 zyz!L)W!p}C``iR?y}5wyPCC4Q1MnM(#c*f^!~VldV2f`liyA3d8H@L$weKI$Uo6W_ z(_hv_rW) zv3UGbAndw$oa~CW#=SoKaCylB*!yh??fv%_zHZ4OY6z@xev{IWt7uJxEh?PTPr^jj+~I(-zcW(=n8=EouIWg5$SJ{8LK zE7*?3KUs#$5UlY{Kzo~bav zk0*SN2-Eg_y72uQ(X7r(AkwL0zvdi47q$St4Q>J59&Kb36IpbpDt)$WCDj);2wK-J zz^@P{uGLE7sf|yVq-7qgygLut{bnpi=Hda9mJLPHUHea&sy(|2B_q%)TUg*V4$Qz72vYUDM&Rc_C>0P=G@j zhskb>p**GVJ9@8iL$znU@K2xwY}oP^LjUun*6W0Ko1*}l1J1$NdtdSBXHOPfGFTwh z7lVa*F9v0uN9R2y;={AQ;dQMm#PDMVpLog=XFf9|OExS(rL$x3mqQpo<=+bPZl{Bp ziI@!w$;ZL+74XY(JQhhFhGF^k+}*)RfZjE;=Eaw=Hh2IZRB{UxqgDB7N#l_3PZJsGzDsc!e zl;6c}&t0IOISX!I-%U0j-$~j(*wS?+rgZpoeL!NsbNs?d)lyj+d+ija>%@T1x4WYJ zt^KgbZ6X$zbiwOxb4*gO0H3uIG&%pSwfvmvJpM==Bp&eQF(*1vEy@`mw z^)Ou%;LQmD_3`m%M}{oLhuZ&P zE!$97Zo;55BLNP|@|fXo(6&^XYmR(W{}Pk&=gE2&_`Dhl3I@}JgFjGzVHM_^7Qvv8 zGH{}l@+_()*xbF*{<0F4o4JUzC(6)J+ia28IUPsr?Lv(q@-ST|0Q`1)Vd?V$bYJES z=3cXfCkW(~7k+d3Z2ba!nA?p3YovK#PKSL{mp? zBYRrcQp03xwB*iQCPI?uj;FZcyAJ&+5MWLobHVs|vg}!CH7lHSi>)mv7h#Vvzx|~S z9W#r7eU_qb8#aKMc9)=vO$8~xLX`A$!drVLz&5)xPVTq)6@)8K`}eh8dd4gubGL@PJy)BoPTXJxtvS?2hIF#q8g zdU&G@H;K3|?)UzNSCXHj_%X*{aaoWm7zcu#C(u|F>{IjBkuOtKX|>SqaVfK=k7J(T z;mqB1(xV^5aHo*yFtVcO4~60Nh37>HO|Kz1-47qm`HmLr#*=@k!Z%_gq3#2g;Xt#^ z)bh(=R+qjOhq~nBJI8Y{Fyar?Tev~6_cUBMMKBF&3WWAwMKHQ>2?Y0#W}S_5ASO|d zZ@4gmE>@P|hjQ)sw&G~4_Rk|+X!S4q;>J;EPZ%yMhR?^t@lMG?NO@X zpM<7{@l_~fBqJguiGI)T|6Zqa>O7xu-PiTLW~0pCR#xrcAPmfELW{e(!130g+l6!6NrPZN2)Y?Z4z^)_YR~ua7}uTgyG;B zDfUlB7W6fT;M+S9po~^Dcf1KYkMKeFhX5Y9t%$#G?8Qkxt*LBH0Mu;Kp(Wfc-y-%d zk_bz7XmK;&3dQO4d>iuk+Gfo6sAivP1d_-tD$uWbAE*613ey+#0{`4P%pGWA8sg65 zVXH{I_;4k=O0)~rlBJ1uH;)}8SMl2&e;i4A3(7n4Vfeyqs=nqD6kCqMeg`ueH#da6 zEf!Cl=T<`5D;6vKh0yBSdUC=_g$6rKWt`;48G)KiJUsM?*?EIY(N`vsje`?NW!*$v zngM(&!eCS5g4s9h>&m72*fd|a6PeIG@hu|xA3sv3L!biyqa3o!p+_4oPyB5`e`O}9? zP`oK_tj<`Qj!;`!PQ{Eo=di5DbM zZ*UM+SG|GoA{`o3H3IkjoAHd~APQ&5W5<^1q}N}Q=w}wz#4>hpc|4YBRc-(?V}EM3 zlsn;aO+eyJ`OJCmeOS0A7E6nkyM%cr< zqI}#wO`IAm=)qo55wg1B8w{`I9A`UD;Gf%EOX-*yZC+pvsZsk-EGz?#a!$5g*9mX7 zvk^OWQ#h!NS(2HdAJKe;3S}2QhtSjF(3muV^aN5|+kG1D*Lh)hRuXDUi%|ZvNK8;p zMuSJEfjRdL$6uc0&x^eR)&`~|`KS;z&`XB(9zy)?Cx1}~RovWipAV(gn&LxXZG<-fN|>=A%Cr&d$6=-a~ zBqn_OoDo4!3VxBx8P21vH&% z!&J9d=rQmN4qi~872g$EBX51Gmv@RetFnMG-mMSjGeqd@1$EHu@*Qneg-Fd8F{nwM zjcSEFnx&!xmp9!qA=qd=!9&2ofg8c@3;1Cpx5h@+!D{YT%h zqGz;-#n)Nvxn=K}ul{!QPgVmQN$tXHNE7N|Jb50g?;M|9T z_+zCK)MW`$Q&T;1q}~terR=Dy%2aB(pc%*Fm*A|u!zi>v7pqN&*zdY-_*?A|CZusK zP1>1^>alXXOI*qH^YKuB<|Hg?$->u51gW3xRfvsz!V}==Htd8E&XFNZ^VOwM+VdSN z65*!qAE_emMN&g3zzvkeA#zvlY?F9V2JPy5UBG9J*}!pn}i76sbVydC>j!($aEsNfsyQ`AV?NX#fRfq07B1I+)pXP;Fv_aOqTo{`*$mX})!>O|JsOB$4 zBkn9`c*hv}esdQNPW2`drcbe1L6rWK_C(186X^kAGa~AK0K#@%ft$;sNQX%VHYXp0 z0#kdi`1ygqt;`S)xT?T*V#=4ecL`&h|1nbxc4F0wU)VNNmt5l-Ay<1ZfV2=%nsCR6 z>{9c=`0RL&cJ0kRm@31}8@_-ODmZ#`cNSamHXh0oo?z*2?s;;JhU8`m;v)DBdu6R)MIzLFU`nH25#DkX`dioC>~fz**!n67Zp?*ABD0 z2JV8%EGLpCkxXSiK7<$fYB+YPrY3aDZm8+ts6$cWM51ml=tgj*+pMYR zl}j)~@-4cR-(YU(hT~zkG7*E%_dVhC>-k)wYMwbU%pHV?I&nw8cLMC4_|te@x*ydSJJUnA zli}J_MQ{t*hK_&qV3%7W9zW}gB44=wwU`n)wO)cm+q+Vp!wcv%xrN(rbU~HmdFcB!|UiE8HwX>hjChFI9qJ811#kquv;x-VMNUvFNk`< zcLg(8qcDhW>m=z8=|UKN_JOqs&%&hjVIb7#|MyJujkXD0Xz-o;n%r_g;y4JfqvGig88;g+OTL}|AdLilW4(j7|9%RItg)>CN2$3z@DTLuTz zTVR3EMCNdk6%B1_hAq#6aq)|O)<&%qr5{P~YNt~OTdhRtL@DZZUJJfm3uYvD#Nyd| zS?s*6Lg2+{!>5&7=?n`yy073OdoQO8>leI4{!B4qA9)9Lgl@pP>9eV@8wck3NfZ~Pk8N7t~kHYekM`=5bB`E=;| zElA_I_ucP&g9#0kp{FJ-;Vn~@A@Ucm!Yc(M5|^O~e+N2v)1N1>Zf(Fd6aHcIvYtR* z>1=wl)&q;gFN4v)c|>xo4OeAo^Zk7Y7+kQTNmE`!oW%?(7tX@aiz?Q|>N+ylb}^C0 zr5L%{8D%fuz>(D(7jW_+rQt0Vl?m>W~-FvIjRx@*5dQ)^XfRX)zf zId-wD*JbnGCM#1%fm=APCr0NgKjz=~Q^rt}QA{XEfq6D&_^+)JPH5yXHI}6?Wtk=$ zvS%*}EKo&JUL7MRyn_C;n*b@1TwAl@MJNf9qzB|BY0v9NIB`b^6r6L$&)J4F#9Eq= zMqx75?H;=&Jc#Ky6$_8N)#&|qg5dK*fF=~a#=eOrSe)7k<2`=ZeXkXdUE*A5X72%V z6p)nnvu%bBcwEww)IZ)uwTI^5#u-0A>0kjg+N4vHes?A#PnlF*k)l!_-*MIUS%hBJ zp_O~}=m*U-s9fa7#Liv^`QOKwxrr~)`ua&+beSV?JQgK0YQ$-afHQt<_JI9f>*2a= zHNU+08@er=#Nku^FL`uL#~AJ!;U<$mHqzDK z!`W@qtnsLCC4aZyDfqERn)IK$kL9NGX!q|JjF!`{5dA9_`TZTU_%pl1+wnw{Y04bBj1lmK6ZCs4DybD^{POHJ3E1g1uQ8mlj_Mc*6h zk>vg>F!4~@Bh{VVf9noy?;JDz4!}mNFcj-eJ1W!y93KO?QFY)80N?K;IMTA z2u-=lJA6%?&Odh$EuTwb<7&>mdn%m<{qn~%tA673{421!`3Q&|KZ}R+d~tH#Q?~s# zq4Se8Fg@)GM6B!pmDgsp`u1eXk}tTMyOVv5&gP$du?&QJb~DH0hj~Rm<>9XGIMgo+ zWJUH*V7y1&C+T-^0_UN|apeh{sl1!oLmQLGcT>UVRhhS%?op z=I%$3E1U>p^BK#pBTBd&+tvi5bI*{k?o8%0l(eS03@w{k6BKFKIYSXS)CF2=6v9-Ok8PEMH(qRm@j z8W!`4JvP;d*VLp;=a%f^TGB3a#3TXkjp3BA**ixlOOY;1r&>YfFGQPzm4Y++dq3SeMdCAGsXl=;t%1mI3=pPJ^{pB|HDgx zPnnAO@f;c06RVGTQ#CnP5>!+SVqTiCYLyc%IBiIjeWl6q94TYz>gK$rM z4ND(4;ln`!OE;sg6GzM(EWrB9FRGRev>kt@#i@I?%SQH6d4U$-c&(ZfIXC?&!h2P zEPHMXqA*7W6giv=MH{qh)cP#w$D|&nQ&JS{Vw!yL)ACdT4;{7Y_ zsQmK|7Wb%qg`))QmH)QgD?oVWoUKL^1bKF$JKi^=!(`Hycjcb1$?uMm( zdKfHKgLAAWQRCk^c(Jkrb1t?(1~=VH%2-5-XKADALHe>H5m~ z1Lp;4$&Y``<$WRSnWA=#TXh_Cx&~2pEFNQrxjU!20Lij3f$YWyXcTq?=B8@X#{11& z(sB)LtrwAC^`4t7%NV^VpnGD!}iC*)UsB9^dByVo5hoG z=VKs>RZ-MYB82e03VGv-DQN9V!7nEfE!w{^&;P8Xug4mZdJLd|&M~|Vlh83-j*5(3 z;0@n$VqdWBAnF)^FKQ0(ldf}A*GZqz&P)bB$a*qOd8hDekUr*ltJ4pod-)m*g1~#) zJ~-tY2^)VW^DemQ#LJJ zYogKhwgas1Pr<*;R`R9y172FRqbBO0CkpvWqT8$Ipl;-aczOr}|4BkoOf|@^xW$GZ z)h5C#!*GV>Vcfjx3H#h!j%dD`3MP{pm`$I!W{$x@ZlZq#ye@MZm*Gwrv2Fsp6S3@V zmlGsSXAz{X+ypINg7oSk9n#$qTrE73hYhkq%*&u>?1_F?*tJTCG#Z_Ue)lW*Pu>T{ z_;p!J2$-#H#3`H;>trP&Q7L1SsmhEkxP_!nM(9px^k@@g;MLl%x+=y3I zy!am$Boo;mm(gvtBOa8TNm`~@k}H2zQ9;8KAHIvnhR57oYicJ>cvy$LfXR?CZiElL zpI~Ep3fpt*1@nAk5^FhH32~i%tcD}bRahAO z7dvjv1Vh*3xNA{2`|!sfE=_Dm@5${3U7fv9@s-n`#cQx?RXKjabd-ot#P5p~=&a8# z!Bple99C|Fk#{|~*xL%fb6SzW=M%gZ_ez+2RUDF)g)wT1CJD0m%s!0Eg}kjt;9;}^ z%}?YpVOM_vzu_~x|6~gIFXfR)-*;F&Eta1%><89f2Dsa12gY%SbDw#0;a;9Ljz|6A zoRJR@%H%pdS&ZcC{Sc;z${`JxfN>WFh>k`V?a1 zTNpXznK=0BI5J8nVQEh*4sDj9i;SjY-`@Z{QiC`%K$xzX`5F|y?WdPYs#r~Kwwvpg z!nk(!v;E<=@H?GC-;fXfPM0Q%e=^t&Tsk?-)f@HC|HDZ&kxXG+DevZpEq!JE8go{^ z1-l)4*wyKl2y$F=>0<8Q61IZVbb7(_eGCSrap}|&E!Oe<1z0q^865QoQL9B1oy+z> z&MBzLEaiUYwFT|FUJcXFtt2w#Gsx+&ai(W?J;wfi=jib;8$&EcaOVvL_H=_dbxO6S zA)KctH0~EqJ)jFRj;w=i+&#@}zd3!Dod+&TuJmhBCsR_a15Kar!%bTmCj9G7jM09| zh^)GelEjZq`@RjT6fVJB`FAku^d*RYJj@uUn-PJsCM;Q`Nq_%$6+FAQ5gYSv_#QdV z{9d^e_@%L!nV>}K|1&11FPV~D;Q(~GYD>;(yn%<@jC;f33@B`$PF>SOVRcqKZ~W9A zkQh{l6%vzSE0+}dGCxLr*_44&N2w=;Y?y``O}_G9&7+$Cc-QS8Gl@^~V6R>Wq<@cRm+1FG zjQV0M6gUnKuKx!yw|2o=hYui@qJtNDTXAFI5?E9*m+D&^vB@i&SeN)BP^%Gugz@#v ze2%u#T-ykq>W#3e*@tnJai(t4TuYB;AydN9i9kIC&xA~++Z1%Eve*q~`m52@c_i@Q}_*v(eP)`jetU_Yoss<*pB*r}_~HdG7m z$u|08NS2xx4AxE*>B(-jJgi2*$-I$;3ls8pvzY% z_=n4HHe+_T0K28zikH!*$Y{p-k_7>qn855u7^{|q`yT&bZ*Uv9$6ifL2^6H7yL_Ph zY7%;yszSrTZa8$s5|wu)VA>{iyzoMkc8 z({tFfD+Ix~y^a;PGKXWg$8b%W3EiYHi~bfn1eYHNF*nT5V1`~J#{AJvZWboT{%2|ayqrQ z(7@blgbI~DXO}vKGvc-qI8>Q|QT|%QY&aBs_FL1qnsbo;P=V@izXNYucQUpE(QHP# zE{^`tpucrDl7>(9Oh;Nf>hR3S-^82vuhgD*eU&$rcZp>EP>EWsREM(~av=Jj7P%rl z4VT25)0qRV@TIk%6HH2B=IuM^% zdm3Qbid$ZN$8JquGH+%M9t(FR)Jzq_zsq1>?lJJHS`78=8BEJ6Tk7v}gSEMzgt7`} z@sc@5*f+fneTM?^gXSLGI~c$&UG^42S|-4!6}nWru?!5W{n-L53G)1OIG$TlfWPl# zu_w#3U`w(X`CG7r&Kh&W1m-*}^IeHcoEBrQ?IB#=b_+FIgsFFBJO9U|omf|w0P-Y_ zZ3#1_Rf$zNr^p5r;_rdgX-_t}Yc5@`Foy_kIe>T8OM!_<5i2p#f$ZwN%D3J5iJu^9 z0y7IY(=hiwP_6$3!E+Q~$6j-4d8>wXVL2^jg(tnO@RZHS{|kFnt;zdkcOZ237-nDP z8VV^A1Z+K7!19m)OU|Ve;GWV7Rgcr{ydl(OS81UcL@%>*VQktzde_PM*|PDuQ+GMMhRpm%I=QhnWF3 zxcb(4v^geEkA&U@x%G!w@0+jLMT;lW+DArI_vIjCFl`FGZE}v$u_@*Mml1`-ONyB= ziAn60-IM8kSwan3zN2JZH6yX@9V0l>h)&IlkbiX?w{DNdS+Z|%M@%|qKKJ6B>E6`E z#EO-Cvy^ktXglccpzrOCmUPBWyur3T1)~3J(?172mhM4$KmxTS= z!~XMG#{Tu@yz83JOh<=U|YD7qK&zchXReF=1 zvg096t}vm``__THdp6s2!xbh32H>t=6QJeL2j-Wb0WGV10GCr=Kx^zpj?F=DnNA0`nh`U>ZrIIHIn~4*X<3K??Up#mrS0<}@ZAjR z6etNv`AJZvSBRUr1lL0)CEDHL0byKPVA47{D*a{xaZePW$(;A{#>K7lP}x;B-oF%H zpNWS~na$W1ZiMcB6>vyIhPWRd1O1@IQCdnVVP9+!zU@$;KYhLM)@K2FM`a)L#77=>>!`3U=gP50 zNSXRYM?pAu##&K0i8YeScC6x38E33qLAm=QPPqFTii79UX)(oM>pX=nKO#(o4yu!F zZNDLQRFT@9m4T%>iPOdP(#J#hy?;O1m%VsjnJIz-6T zCR@h0Ta^B)9%Y6-`uL*5;!wKgGKLB1(`}11c>`O*9D%;a;4)74?0Cb3eOIS>uam&U zvyORcqDhw?mZ2Zs3KPxA;3!~RlD8=soTL6va%p0_qJQA&XM#*hM+C^%5r;gsN_`a&(xtg+a}VegHKVba}NC;$s;LJD~aSU3QlL! zF>69CZdKL6f$JyW9sSKN7!f6MTlI*I=NZV(T?YDZj7dW5QQWg*5ciHLktS0s&QDW} zi!UvOw1s01#r525Kb3108!yC*!s29XPYQf0=b@&XBH8vtksb(3Wv2~Gkde_4OTs{B(!4cHwG}~sGb}$_|fbt5`>`ntNArwD@inTZq zZTltobB-(Y-aE-|n>d5bHB1J7we76U$z_m`Ifusme1pbyJZQZw!0nx;#7v?MWs1k} zk>mB6T(-{9a=!xIe#C_?ezJ{g@qPymvN90x%ZyUTSEw~bm?V8wp`9GqLqewuJhwlq zE-xvE_g?#q@S%2 z9I%=F&FR_uvTVrPHVY`*@e!mI=fjVt`%K=?oA5;Y4rV>l+t9>47igO}%2QHArlM+~W;R?vL#SXjlM8*K%z z+>62E+bnukNSz%UP^W!A_TohDcmBRvf;{8>sI93$rE8|s<16fFkj7pb;robj(7DcD z5G{h8VPEjo8VUm-PB%Gx#eFxwqD}uh)ShTeH|E=d)UXe`iKAjC&sU&6Li6B59v`&L zqQJ@KICiT>!JoHL?92O}_$xGny&IZeQ-Fi0p@k@jr`Sgvw;_;1n zws;0PoR-ISW$r}V>3Zajo*reyrcmEgx^z>=U1)#Xh1Pq_`C>+@Fg<7l%)La=`JWYA zsbYmn4mzxN=L*{K@;5#R;g~I3GAK-i!!5cJCxO@=Ea7!hEJL zM*y#79%APOt%F_dhV*xcGYUVeMRQ#v_-o?KubdiBcINHIV~=~-j2<2Nl7AOz7VW;xWiRHC75G+474*VvG~?g)LS6|<9%Od z-zmko{WhRcWl^kyXEr`uD@$g-U%{kZ)S~hB>70*Tr^bBIVX`~vD@rdq3F67#Y`1C= z(-*V|F3iZ_Klo}-Lm$Mzk6cUkk6Z_Cbj;EbM-p)9mg#h&Zz%76!4>Rl^@rz0 z$FT9`Ff)0* z)2V`B1$Y!lkW>LXDiZJkGGtGp)%A0%(&TTLWtN4Ttw*6=VkLYPPQj~gUNGsv6XZ&7 zNbt>FP-~w6f0e|cJpMa6_4e40-I3u>6TQl6?SIZxn=T-I%LJH~o(|NOyoPxq(O7Q0 z6Gdzu!ub$?Do}9*^2TQ zmm~Z#KM3?HKJCP42XKFhYpN}x_zZgdLuQYs~Q-r23 zMwuHk^1<_>2d!U!j7eA(g)=wHQ<3`D)rC3NtJ5~y(dQkHur!F^iX}p9dGk@Ma59AA zl}^;;;{_a1L^N5rnC!TJnfZS6DO<7mE7S1K7^g6!V7s-Psjsl$*Yx%BF5OPXmXZ@# zlq5uL-Gv}$uNXPnG>o@&&1va{i(EtTOPIfWJr(R>$eRU4{4stmTK~;~??MB3PBn*_ zcv~69(*=me?P78(JOETP0iMj&=J{^@$9BeiM#&IS(&AFh{9LF-X1sfdI=$av|E3i7 zcvlMh@7vj$r6qwF{7rxuj!neW*)nvjmoVu&UV`-2I&itVika}M4y%GDkhu=$!P<_a z1akV2^YU7V8QMZGC;E`0n@E}+x$jVrH9YcTVdLFC45e253IEB` z(n^ZAo9}S#Zu4QmMiU5HB1kkBQTSQ61ww|GLiD^Y$BWggz}RSnku(~Db=p-Z{hUj` zPCUYN%@C(&-Zk?SBBW_P_dQ6OFGd!h=x5tE$}(4ivuYYNR9UyEMNm@Sh~mckz~jPm zh%B`Skpu2h!G;#V$?-GwwP1cgcjGvEVoF zR=*4#PT2;*2kx>JDRIbL8iv7@rKl!)kBN|v#LwH0p}yr#B6uK>&HK2W{PC8dgF!Rc zDV39{)x;>UX(#}NlM5mCmIN&}e8YKXWr$pu5b?TXMg=&p7Vm{T6K*R{T1*Yl&26ud}wkT%2wZH?_D&-jj5W{ zT+so;%-2!z+$&HUl8SM5+sN!&?;zvx6uRrC2|N*sWv{Qh3eyZWvp;8@MvM1JHP&+? z=%q3}?5)+Nc5bIZVp9XZ@XT)Z#K##}-_OURSNqu8ObwRhnL^&IpLl;{I_+AT&$hPc zV5vqdwj5DLC*7Br7ch$o6-twH>(gNVw|yvN<;0jq#Di~*B1de_U`$g_BRAe>MT>fH zTBj<>9lL`3!~ZzqSqx+GLmordmh-ZuHaHH;7D3w+d9rlfWPDL3NH>|BVvfHRLF*7N zwET4%qklNk4DFAwV(kLV(sZOB4S4jF(oc}ND1y^+-I>*yRqUI?9!zHcVe;dcI(<9# zidEbB5iP%;f`<1S=z)S3Fux&6uUp$9b6^Vnny>;3&T!sr^D^EVV|iM!_aq~mTmZ+_ zFTiK3Yhaf-jBX;FU*{Z9w*#sq{p&D3EEh$=i5ledsxwT>+H$6|at`+0m_fvp^mvU=PMU74jXpBO+hiF7nZV*|0GI|@F8$#D6~9*91@0h-T-;tUmlrnf6V&5WT!FMFUT_#xiQ;=!Td zanyh30AH0niCL#8>93nbjl|RllORk6v&}%_UI%kZ_z|S;sDPXhYihzi=V;o?(O9bv zyh}ysct2m{bI{pz8+^XL!#lkR;PRlF^Cw4h zzHJ5a@Ph{F@@IKbe-puCu!y-Exq-U$AxGBj$5! zIzK>x0qG^PP~y*8y6Eg_+FWBm{8ne}C`IYmq?Y%Ny-Z}~Vl|SR&C%L$k zYa}gdkfL**D}t6|D_fs&kNNP(ki3nHLiOo(wB?E{wFw>vsmq?s_5Q0k`r;-d7wSp) zZl^HAULHudi)b0+onTDI0!italW&t-2vfv~nSyc};p!qMoao&CttpK^3U#{!s~ z@`ZVYCDrw!v&&Q{c)jmf+(ES4bdVHQu5wB#lc4lI7`xo4NL6Ax;dBiAAaN3-(8Aq5kFn;H14f1ah8rms!fhXR#CLE*7IT;I0defZ*U|SXrf69Yq6&Uq`&6EzmuCVz*LXU z+FZa+%yy$PH-w46{SWZ>Lp5u9;sBgr6EWze6uGwh9SV)Gppz1eFM3ZP@2NP+J==rZ zrkl}UlBQHp{0u4t-GT2#T^Knb7`^u&fMZ7$s8-u0s1sG7rTLL-(@a+Bt!4)p!l>uIYhj zYBJBdxt*OBZAS8#7a;5S51a}+u(nnP&rQww;O?|mhb3B# zC^2OM^K^nW{!}{+?b&rb88FJoqCF?YO|<T6Io72n!Q1U*}C;Eb3(QQ&sDx>{`^{wGsfRDBBP`SQjPAb0|e zil4(0lQi&QQn*>kHOJIfo|phxINzg@5zddqk%w|rP^lDM=DcU0tS`n#%8BUxCIuQd z2V>&XOg2R06^PhOAnUhJ#K=cHYOiksy;{v!pH<;Fr??vfKPN)gcVV(_PY66Id&{in zW{~ciWB5MhrufL-4Xf6+poQ`;%nwhbOHKRv29s^5-VF~L|3`pqJSa;iw1lvSedL+b zB0(&Zu7LHQUC9drHP(Fd4JNZel*;%h0q^-nZ2xIUy{r4+uI(YFmz#_HoOmCHX4f%u zK7GZCELD1Lw*r>NWTNYqD2$W%$)-&5hf)1xh;I(0skMZapI!s;-=)ctbUU(a&O;!- zCR5im7uu6o2gfrFz~#vvTsPQ?7ApqW{(EC^v}hZwI#>*M#cwc<(GEEFO^s?bA3~?? zj?~UP4uVQ=VHB4g<9!%oeebBy75NwN<~A>S^-&Inbi|^y!4RnXu4L!hFNfVz*Ylrd z9f9ROYIJeuc68v@^`$d_ReITsW3YWG@!nrRKY5Ln#98yVv$@~21 zDKio1=FW8HkLMzuV92tTZxGSl&gf3XwVl z&BgkRiyX4n3$~$+$~88%U7B8Xj=_Nj0iJR75%zb=Fs^kq1S^Tvpte96=WgA|c*O|N z8JlH@c0)YQe|Z(G&V6D=6|Z9Xy>ZYgR0pBbd3=|sePkq`Yrz@jV_gxnL@ z<*}P?cG0It`)@d+VQq@#?^kO&Ieyu>)1I9pCB%zDlMttXx=J@N82zlFO z!0a{=g5ugt++aI^e+$l{{ecd9~JE5h;qm6%prKnL2TK7 z3A;+TOdFIslui}JuKia)d+QCz47>%~3|z?e!=BJNVnY_HhLTl=*-&kuK~f@4G8v%* znAdGh|8!o4$jDV#^Xfj%o%@tI7b_2&Ke~g=-}mhOL#Hv>!Go8{Y4M@+V(4(iCwLVQ zgHy5((&iBXGQW8yWjHN#e6Jb1DE%3CC(p;$XPmxdvYGUs^#REn-X!B+0&DxA8S+fI z<_?n^_`{b+HvFi=e}^nc!jaePU)gQ2?W7!ag<{4ovV|YiYzgg|CAdQVI;#s>^v<~t zm@#93eXn#AQ;LuBEh4O`)sA+~PzVxopIPM=fV~U`Z3B~Dzs(49u#=RprH5;40=?~EP8kcuMBN!Tm+8BHR`aCJo`!VXR98_J!HT^r!sta)_hx6K^wZv#}0C`07= zBrI)~#D}&?@UtcZbz&wFJ;9S`Se(dIOvz$5=-*`{)Gj%Gtqf()yL~~iO=9%S35skXPGI&j60<^b%CK);* z4{Fu-AZn}_HosS5=Cr2a!fi3w`nrNYy4--e`QPCGF)`x5(>e&>Ul)LSQwHOIxF2e3 zoe*+z5Tjec?Ev@Q2j*c|m@&*Q%Yqkg&f}fgFCm0H#13~mjyj(TEoT>St=#j;wq5VR zM?)1B7VpQNms-o~HRTqFE4Ir3bSkM9D7No?hDo{H5GIGneGcI$ev#kclSn;p;5 znit9nO#i@_{BMLgk`s&mi>|`$o?74!EXTZsqG+~03J;H1Q>o39h|aoF6mR#Y0t-_x z>YEp~S@4+yQo3YF&IRkodGPSA0Jt*@Pk-G)QV zRF_b8ZL1NuepUka6NO-$Jew*S$nt}1+;DZP0eBgU(dY5=VC3j^_;4Z#cJ7{w>Ff1L zlT-ltP`H9z^WBR)!}A!DR>O$)Xj5OOeT@3?RQA`bZ#bCa$*@wYHTGLN@yaDch0 zQzuBDtxDqu`CMZaZi&L(|8e!^e>r|(+jc6=N`p!?r&LG@)wPagu7rqW_-0mS$vmdH zXjW)aX+j!F({&z;kWeY4loSyWB~(gy&inbi_Ycqe7s%DsI?r_+`@U_(K16)M6lO7G zj2b(OaqY^DETe2G+G;L?u0_32^4yExZMu@?G{SPDlh8eF9x+auh~XEqL0?)F^_J`= z+Ruz|EDjP z%mF#JZ*~lxATcQ4Bf$>x_l88D5Zv&s2{%V333tyIp*k&@_oX@Ez;_>O2Vo-;h zrT4*%q^;0;RDj>*DHl*X4r0!fq09bC=-YRbL)o!3)=O=>8bSzW-zOSeO|V-c>V z;?S_t0CO7e!m*J{V8#MvL3W%2xSXnhiPJslryZj7@t7&F)AKuqN*;oKeqJI^N=gUx z|B@KT7(VY%&7CXQ&1813fhQi1$vFpecBwdDxS+5E{_#wuNe{nauxKi%Rz3hv3BZq~ z3bcqn`|PK+z^r@Lw0pt@)RrxWWw?+%xxJSw=o-aV$SKmrBfXHD%jb)`fd%&81DoCn zLap(#w7#JaRkTc5u!RB9n&|@z_c=4F(+=8YKcIgNKWjaGj7;HYxQEL{!SJjKyOsMN zjx#c#E3G%t8yBx*RKOyb6{O8rS(f#SAvbzMw3S?R8v)d72K&>E#PV}0s%_@K4;5$l zJotGmTg7*vjmtPaZQe955&)sblI-`WC^VXygMMCR5c08w)BW&^*k?!J7%MBL{`Mgz zD{qB225n%t={#4GI>a5@>W{0>`Qxv=B}8h|AZhjRg>Lr@#drgl zh4ER>>OrvhuO0394qku0D_Op<5mkHb*}(Eq^vS6$5Ef}pA9fysiO+K}L+2jGY}(3H ze;cAyiYqk6%VYDo5HQ>w3aImqTkG;0q?47nx8@R{H+np~ye|d&?2Mr=#zf$#yqYch zZxQ|T-)zXV=!3_{-N5UX2SogN3Byw);HkknHs#7I?&qGHWOgub3#)tyH^t1*Y0OWY zlw}BQH41Er{R*}bK62$lhM;IVPM9K{LduywxaWFf%egW%ol(Rcagm@zhVPO^_zV8( z{KXixJ3K$YP0-fm#Qw@XCy(E^ai@NX^EtaOxcEyFS+S#}?E8iilpTx!mw)5gjrD0< zLwAB;z*ZjCc%Fex+EUzHbKXd*XA0BTy%*fAFT+~XC9uj(6wG?Vad2+{CRF8dvwcKq zY5rBBfrl8})d@$R&Sy1Zvh;ztvc? z!W^EF^3fkLD;YgP@4;8OsqD)@E!n<89ZWvOiR1YSE$}9+okV=;cwq^cimK2 zkgFtqmAy-p!qvdD${cF%=7FC}6KVT$0DF}cSm*nlVBa@}w$G8}bWga#&4T5uz&RKj z>y;rZToy%>w21U3A#_$Pp~GjR@aKVC%q)+9xutC!Z=4itduR`f3l6inD{qn$?$Yq{ z*k>>o55i}LS%g#7Mg71Y?!xnT_`^V(u4tRiW>v3cAD#9>-#BAN%y{0~s^{d5$^zW% zQU}w5RoJHq;aE2K4*xyPf;r(=Q7D~AzCJ1k@0ePF)yd~jccmU0_5gH*hhXyiWmtTu z5w}$Y!snjzIMUXfNIvSsOVj13R=x?>>ReM|0eorf9HwBEXcqG2DB-M>sIwo@V~u z&osF<(7s|y-D>8t32AOHF+K=Z{pkXM^coCUA;pp&aim-E8oX(KN{oNyplVwqx3MP; zRg0Ek?=gRNZdDyBhUN22inFMnycx>xMnKECN>p`m#K2=K(EU&v7M{w)`ES;6m0L== z$>}9{$-K;5;t%CZM*GTV?Cc)d)CEV@Y<QX-+D?_TG@b@jyzOjTA<|U9Tx$)fn#~bNzTRAA7sE0cy1JFFKj{Hw93#ZUacLTw_Hd^2)xq?isQepn{#33@5aao~?uzpi5SE%H|661L@Z0ZI0 z`R6DRy}S%U$7|3S{{HQtnFfD{8}Y*BC{83}Ey{1mAP;JcVWQ1z!KtQn-1KWMoH~;Q z7LFO9#P367R=p-B+Eq9|=Pgc;Hf0h4&Qv#NBdm=PV_o4dAz0@ur)8E3?5ZN|Fn$E# zKeee%-d61RR)rfqWSHVM8&20J9?}fnalbfVlFZdhhQ3XFg+F}#1PZ1PxLY|n_^{?5Zc%;$Uz}Vyf7#La z;8Z?j2_BQdAZwPpccCC{R2YUxXW#&zrR%)I@f<;UCcY*brdvqRGT%dj%8?70%bpd~ zV45}N+>RpNGQ-5z?h;B{@z%tY*EoiM-nC}##|eCAXvSh~I#@5x1uj}tapN{k%nMF-wycioD&=FD24pL{larb$A!jiFW{g)KerN3<&J&uMeS@Ecx<%- zj%~ONj}NHR@zz`5;XyrC6X3`O?54xZr-39vT!|H07*mJY9;`00pJ>iL4ShI@{&K(1 z?Hggj7V1{QGLv_B)KrKAjn6@8>mPFEb0C=ei3zS;aHc(j@x-`nG`^kJ#C=#f5zlNM zNjJiKn|Y^4L)NM3>{hfGmFA74Q%alo?v^}V5xX1LYb3#`V|o1NWJYV`PSY!m72wse z6hfWPvu%fSVd5eSHb+OEtt#k)GK15!c7NE z33hF|iVynsa!Q3_?8r=4lB?WLI;Wh$gpgc3b7~wbTRfTt4O+6e7m>I_%b9IG^aSfa zctW^+1XS00pA7E56R{+Gr6Kz>u>q&XL^6eY zr*Qt!i`)d$rJP>NO(^btgz}Bz>`u}EymOL-cV9MO(Va`ErfA4&I>)2J`F4I+a|lKh zy~iiRfr8Fu%Rn>iFtM98g-UDAf*&6zvtrLI)Yvsg=qPH2g*^r=uQwEj!o|6V1-i`P zksEHb7zq`g>P$V=8u=h4)0{kkrdLS|H{Io}v9*~X`lC~Lpj@1Io(iFU6_+t4M;^Lw z*~6CViA?5x5Cl74W` zqiE7UL$I0M#Yv932n!d>hE$ZkEVzvMKL${G6oFF$ z0ryjTIY>ocfKF{onBmulH3tW=b%qPEc>h?qa%Mb8iT98e!x)lwe;YKcbLFAUy8@%oe*PS8d){lfbK(;9G~C7HZqI_iwjum`VIF(XABVk5*3c<; zDp7M&C8PvZbNjbG;6{G~roiV9udn$dN6xRBe|vb8mIfzlWE~U@N!BJckpH?Y}NjVS~k1jtf4$xG$CG~vhjsr%mYpQ zmEgqr1jW)bHrwIZ7(Qzt7L6hNY`CK!2;Lmw*?l8t5Yyu$VNua_@G^GfEF7&N{KHi| zxXg?2PZr|8;{cg^x6DST;}Oa!FJRAnBG~>7TC`tv57rd=3)3tnqx`&W{$Fk z=$Kj!ts+@?CC`}7aY!pWn{))T6C)sQX#=#2X`%C*dh9Zkpel(xgYw-cq4WqdHg0P% zxBRm>l>Vc*Ps)cAY1<>Hy_5nQMpO2~Hxk=ko70+gNBEvk6S2J}1EwMsc*nz0(3@-k za~GxK)F)%uVOLE$K(jz}?s05U-VEUl%i-Ye3D~u!zAWbK1 zD~&Yq^S3QD+*pxr*_?^iV?)Rtn>FOVP7oNy>d})uVN5oQ&q7Jb^E;~toacmto`#!O zEBwEt;Qz}pHu_&q@WlV&1RF2+H2MEH#-{(5V=S9*zf=5@^U_p`#%6pk% z_epUigO*(7&qTp?v#)q!X$Q}BHs!Wwo<#52OF+wCjLYSdGl_GkhUajnyV{@3|zw5j_{ie}r7g+SzQ|j|uSFVl-SCy2qKX%zz?k-WuZn zgj3oy9*3*#Nw}v3l~L4S-`iycqg~9Q+c+8*>=dWlR#d_K_dHYS&`SLLco}zC>CMH^K_-D*!%$Sd)1V73yVigs~*HmP{qj7vxC1(p4`_$5Yl zg>n-f#q-bBi;&!|LdCeNZ+RzBSFwqi@Ko`-}; zFQMDX>)0W>j_ZxkgydWDbjt!)7;SnV9+|1K`2+IoYEC@rdHfK(Nv_6>1XWtU=pGs^ z|Aw(u`>;T_oLemWgZ#~Xh(2+BxOL`A@|M}rOV;Dq)zmJsq)DSQ2X=Bkpg|w> zDM0WgImp=ZiNssIgPBh5oEpywS(h@N?wP}9@h>C|mvG~_Q=G+{R-7)wGcE?EkTpkzpe8no&h!h$qTtV@M<|NstD4B?2%!Hhh{Rhb zKH>E4RG2XDInl2^0(TCqg_Hg_g(Ir=gZ#SPthlp<9KUKvKfR8{F0)1S=Ic)My8H$_ zM~`M>7fymF{s3PyRQP*98jPPZi)lALLc{q_d7f_uC)T9`2WL*A*PH!Nk4AHRh@csu9*^8N&<2e{77R@Az2eC#&jf(v4z{1;JWZfY{6kRcmh1u4^zwsYHW?CNf z&E0}O%P(^uN=^uz=z9o#Fa`W(%HmhiqxiH-5yyQpq2r4}06&kz(8^Tk**XZv)nmw( zd$G{#DND1YIT(6h1wF@`u!l}y@8&3?g}6B;7|YOw(&27%6Ao*!@n_Xm0vUoIWGz0d?&+!qb-okbF-%uOo53h z>#(|u&pK~B#WkrJVA$J>HYxWdk;F|FBqpfPzV6GUL`{pDW2}(M6Q=IBiYy+}#|63t zl5Zn+!HuqZ3|*m&cO)#B;X1;^xL9d35pT@mu8RA^j*B~~*?eQB z$?rFul?EW?_jlMgw;g(aeItnlJ4o7<43HSF%|Z`0z^^;j4wdI)G{I?1$J{t5K%=Vrie(SkCuKBpgkOWbY5JgzGb<;hcUl4vx0q-t9@k zij9|XpU(qmd!2$#yM!40VLOxf+AZvOvJ->n$iM*o%t;%)1Dz#xXtrsE&5xIF1s3m4 zLyfTt&AS>0QyX6pxsVxj#Ksub!ruee7-ZnL`!_fl!#-TS%8~6nB93P@<8WHiHSWYM zSFZDuAHH^t#NX;~Q1M(F*O_EYTPL)l+C)uucrValHv`&w4k)yXuTS*O4jnx??79gDGG*C+VCCyzV3c{Ah}R$*?NKFj~r z#u-US(urC(adyEh80mMG`)L_X9$Z-rm6O$J7)=7Jdtziq{Sdy}lnt`tjyN+~l%-yM zjqi7R6D8+Dfrh#S_jRv6`+Tv6Yu|bq?>x1IFAqo1)J={oN9e-_Hb{W$qc@~?4c-@^kJ<2a?;@biopj8c*$YddDM88zZ`tqah?=6iU` zW+59|G#8A2JJ72iw{iW)bp-PkJp*;oR`8C$2N8{B*$sC!N1%AksvtdA;FZ|>d1%1E%;$D1>Z|8f$fpr z!k;E;f~+9^UCL`dEL!#k%>sMj-~2a%A3a{s9%jrW=1(Q6N@k4hYR4zMLGZ>0#`n2+ zjTZ+?R;TYHX_?Jr=(R4LaAFo6r!$L*2>5)rWApL`?Z*hZV5QaJw@NKzBcWBPAvGg8reuh`0k_$O~?sAozZg4 zd36FAyzxQ!Q}s7IGHV{Z)y6E&qV&1vWlfHbOJr+xm?%HjZ}BP8rjfh$TT*X8>BkcZ6qt~Oi*r=G<_dZLKYpB!s3)ykgb@7qkpTip!Gu-dMp>s zn>(@T)Em-$@F~VA&x7cUKCWxMEuC{_JXK27q`uy6Wm6C9QmwKI&_2=3eN}X!NAEs{ zRrY-M-0CB2$PuCAc_y_-O$seBP@&h}^LFIoOL)M|4?^UZ;FJ;P;BvGwvpYMA{sjpp zzCITe_nyW;iwVTMTaQ**|KRq`_2nd@Ux7<)8=lNC;T-@0IML(^h`oJ<{#JQdCtg9U z#^#e>UnFVx`RnkuWDv{xHTmkF9(^Eb!=#GefnRrL>&IdJW^pR0oXITFQE@OTprnbJ_R( z^I5ByGsmNiaKoYx;51FZGLj|meeZs#Vz=@7qop`J;t03fvIN#I$iu&?Bbdm_G8k{l zz^5<+F5ozLYxPCwAa6%@crK@UQhRZPA#ZfsCjmiemh6t*R@%G3fqKpN!8<#g(V_GS zggC3v+rC`_uZ@=>%%lj^4#&ghg{?T7)q}-{pD@zGh|2KBrh)qLEdOpOc$|)gp-Un( zp=6LaWxc@Mz=hcN}ETvkmm?9x_} zfAA7)J!+4o2~+Wn^JT0ySk5)6hYZtLRCgSG_vIn}Gf@DWB0XmFM-+}^=E8q99DbiMgQmRD!nT+n z*uQNJnvDw<+#BDGPd`zMcPIvgi*kKg4C;FDBkb~4_TC6xK_ zOfEH=Sf@j66-=p?eI)E}OTpt51jg8!+X~Bd##75sfay351zuB*c2MplWI`d8@|^oID|9^#{Sd*-x0r|b=VkQ zx~*Y^=7Yz^Y(cinC$6=`5!i$t*f7f)j%m;14AYF+k{L3vL+Lu?ACQ4#{I(=QZZ~_T z6Ap%sW^CuiDDH97LoR3MWIAm~mY$>UAg^FATW%W2XC8%^Ti*$_H&23n`!%dQ9m}~~ z7$DtYvmw{67nde@k>9z^f~yMep?_-t=;)Zkh1g)Q+NZeu zOsIbSA6%vpiG|Onu+f`82r?f=fpg}y_`TfnB#6N z<9!k9bzt$|WyJ8vci5m=Or-eVzbk*Y z-{&x%z0eQg&e@J(rG7Q=I9?4i4|S2uIR=pIR*P?a^iU8Vfz!{H3lo3HG8y}++_srs ztZ0o7;=mkuvE~TN?r_ID<_@HHXAg`q-biGYSYc0;1$|Jk0vzDW{f->E>&ECXgUQJ=jH4}l%#=Y_YP z%A@R0{(WM>NUEnXlVuimVuk8;aIm+*5&y}N{ePp$EAvp4iqGWss;kgi>1zVBM?W#_ zP$uNuSp};OFQZFuiQ%u|Oi0`Q2tB;|$m)MD;llgP;H>2L+N zKXJ=OC&HrYYg~cC7tk@ONlI0@U5!FierB*}ZjV`EL z9SUK}2Wewy9Bw`A#2&0XLtY)8LA}j8Va|+mxUN?MPfFxrtgb0Ju{ssM7LJ91Z$7-& z$Bl)o5b&JN2q=@DFTC(|BE43bQx>Fu6Lto~VXOKK{6I!=XFp5QTMcz&b#^TrnJdAj zM}C1nqaNX@|M;zm_eXddzgy?#%rAW^>J45SnFavl~HfIMpJ6OA?c&E_{Z3 z(dy^8a7zp=UaZeV<)&~e-xh()LZ08&E6H9u%dntvqsTG7&*axS0P&tvQ0j9o@jX3= zmQj-Q^T0>w*wTQak~hc{o~^j!v=!See;TujC27vAt9a2~1vDgv;g-T2^iCFI_s^ZC zI(=i=0kp`QijS#BU$F^k1znZSj(y_7bX(;&DBZw4*sCV6WdyMGEU=XX27 zhYg{Yx`UiMU|{4l`fL`5Z|l!;+hgC8eEE-pG`j}0_t}B(9d`4Xq^sQ6l_O~SP!%>9 z1|gfP!?rfN;jgq6(ED~Ky&V)t*6Hs>%hK)m#@+|V?Qp^;wU6P+yv3wNWCu;RkfBAd z2$TKE`>gn^jF!&}o1U3AV77;Mcl^i!-+&G*yYn4CDda)NTXhIhHeq>{pEyJJXM+3s zZ(*ib6LbwVL)Wku&eCgv_Vv>2!#P7-)U85e`2e+>YF^+(sofU`;i@V{HF(wYjQy~L4*b@6<~S9O1{$&4KhUxJF6FR zUcVPp#~?pAJ4cjdhdtxYKA(#<{p!Gq$3nGC0huD?dwD$DEhj$^-W(nT(FhxuC$vKE z6KX_TbREg5c4E)0Z=pu%C*kr%vslm*8Fo!I0^bFl!&gJ}oh(ug0*bnKQZ=hS_J=!$ogd*16q~_DJxY>AD)Se|jhuXKR36$uM+p zL;O}hkLeHo#Gxe;Y`^GB7#w4dQkPaUgD`b2`+S8^Jx`kTt0*z~f2Q7s70Kgpb?q;>o*96{uNSbT^8F-PMTc3Zcfk8?BDnqVV^~}r z4KI>{@T>ndI3cErDP9tMJ~RTF?hN3kL0jG=>w{a5w}HtuRmM}~*+O!W(@+zqm%{Et z+9WY>neY|L9Ny#jE=1k=i|a0|+uAm(5V zB+l``oTvyWnsymYA|vojVFfD2DX`Y}=fQkY2SiO7&8F355}ywlWYVMG*cV=c|E+yq zCS`pCJ9j8SW8qZ#W>h9wVv3KX zn0-`{+5H#J6huVZ|#jsJ`zu7RA+W8qqRN0#LBhbvw? zf=0{4VA9Jlrns__6stdl2U|nA))nt@uA7Bm!-E5OLUIxG3!QLw4{v3j*IAbE%N&QY z$8c&xmUPmYkr4hxge~dRhe?r{IBrxPY!jc#TH2&Zvesj8+JBOByV#AT*Aq~%A_ko+ zX2E&he0)Pm20#Ku6-85VYx>TcT$r5-Q15w`6FmxLz76o(i-o z_#(=`?I&UC%b3rv>9l@@C$83#Wo2>h;B-uy8cj%o@h|=(cLy5bfcpV*dN=Pns=o*` zz4}1Qj?YDWJi;Vyo#MF}8gOs9HNh9fcxIObz1cd1>n`2p(p)6z_tz@8a&iojyIu;% zd;;*B6mM9!h=db(pWj?gCQtwKgo5~Lfk?C_tocEpd%7BKaoxt`d~=}U!gdZdE`wm^ zGA!M%Nm_?|u_IU(cV|}PhSK?1pmzsSszvC9^WGS}=AA&hvl)}(Cg5ffeR%QjD*lV) z`GUrcu{%4_gfvmws{lc!han$q#F`-Mw$yHWk2 z30#U_gm2#FarC1)y*}9jCSCSNzuN^^(yPlpw0wuz*UGuKhyGyVl9NPdlO~-rSr%`< z`3)5fiZD4rp62nh-Q|&$;IZK`Hh%BHb@j^3@>_?X+x9xT_iuu^@++v_yfe6heBg!~ zm-E@1iEIJxXSbtYkfs-Yuzl-i>>7w+B^L^zuILcVtWl!}q@~#Dw?%khi#IG5Or;Oc z&1R;yYxvnjA{_nxj9WCGcUFCh;x4y8L7mO%c;)^%IyPn{uA1jSum6^YMH_mt>oP)n z+)6HEawFFhrvVf4|B&_>v*}Z#QWUQbfi30&rl?ng=2mj7uVx-wJL)%@M&x0rlPGi4 zuORKV8%bet7lw|vqV`iV@ZH#3@I&*0P`RrTPx$cPKNlIA)@VVmiAmuVGZ*?c>mXL| z{EWK}@O?5zO;qXq#+?He-nTe;y7=g$p3p!6(r))&@ zdGs2{z@o!i?5Uazbx6O7gNG|2@6aC@TN{CIySFo$dwbErz?z%uqDZfd&}KDSQgrri z23}JhV+DU#d#IYqg@=SwQTG#6RCX?l+~mhSKV?GEJC=V|s)GEm3EoOhL%&C=ETDrE zM)d6E{T4|qVb*%|iywnhDt~ba-?c1$H;6i2kx))_=&P%~==${sz8lB)yU*TW3vESM zuVxkbqwLIc11wp?h5^ja?#9a!Etr`12IHlwU}AeCj(9o`&-ffiRowz*@x;abVBpbJ^|ROa1f0}Hn7W` zitLW-Gcy0W4?O-)4?pv)oW8mUp4n1B#-~UzHUb6L+A1K{yb_Q6{3RUP-wUUA$fMK~ zQ`)kqL?BX@g5Q2fp_1SMclyC+pgN9{lyR&kejdAa{vNjT?_o{)t+-?Q zDOj2-%i^B}gF%2HlD&yIX^sLT2QtyMb3dl0l%Vs^bnZ`OjbM*{8$LBnM{93>w!F>| zJ}v$G%t z7p;|pb?en=igpRvYY`wwUiS;ewW`p?>UJk^DZGZJ?6G72A+N2!XRl1^ZjH8 z+ei>5=FGzlhRZ?n#y6-)oB;WUy>R+&J^H=!ASdcwPpsdmv7)h*z2f^zU+xpG9>3v< z>1pu$!oJeSkG-+Fobq$_iNXz)j#%mU9=C7JL#=O8%*WUmUrc(-Wj%UH%wBp6j;Y8q z1C?9YUU(i0e7nkEk|pFc3dyeMdb~KH8+s;6FvTcOwwo^&nkTd!FMc~nZXEldzlZXal4jh zdUBvUNsWD>)%fkq8MyW>1Rfj|2W2KlGva?@$Si$)ajwkY!_ODO$y9b8gENSXGNWJ@IlK8kD~x)|OqE7Lg5cM^fPHYJJE|RS_5+ z_D0uqRklYM%JqCGfFZ5Xxaxa^U@5l`|9Hydxib$jDQE*MI?3NzuKa>86V#wfJ`?LN zG(pt0CwyMM1>~k1QH4{ksPRCZz`0Sh&5X|pXNAxwuaY4^Ta_IgD@!Ylq98w0lHD6? z#1tk35kHR4@)+Iclow54H4@g?c5Mg7+TMrLQj_VJb{p2B@PcbHXu<#HIDOB5~{^MvbFt^;Z+!{r~;hpJIu@ZWC>s+yxg4O>%CuyZ<-F@A`5%GaK9wRiqgQfT}sNU;An)fO}&n8Q5hsaZW*_HrPR~|vNvr5pP zz88DqmGJ5cP5Ax%I;3s*iK&wY1#i=RNu$eJbhGUOdBX@u{;wSUJ5JO2dzI;d-7gU~ zJi`lVT8ygbG9$Ays3~Q@ynjbQkdHN%XWPL|9X=D*@(e_s22sgM5+ge`ai#nTZflky z*~Zz!`M;tp@R&M!i6-IhiLz|uCQq`_=rJrgk_tL)57D*7h?Tn%`eyPo9NTdP8b>AI zul!25@U9T>d2D7*{<_pwF-g$BaxB{(VFPPXF^1OeMO=Y4d*UtJ9;? zUTkFrYYxGc>p`4!{Z~9Q>`vA`9nUUo(q^85P?-DOmv~o*;3tupuynW;KHEIQ+|)_T zb+jV;c=HX~-o6jk>KaUJ~{plku-YlQW6{JBwQMa%6sa|=GL$7gNHFz9|B`#(p)&Zrg}PrWUunh-}^ zwx7dExv@~JX$z1t3^_MvL3z0bEA7*OpbJqjd!Pk2jr}J46mXd+tvrPJ-ZO#j9tLH; zk6?I87vA2R%)TBe!{SPwg&I1SnGt2Cxb_9V54tVfu_O_<$$P=G(PC_x?kgl$g5YqD zB}5Ju;}Lf+7(Y}^%n)KzD8ixUSB^Kqp2-%_4;T)z=`?av|PY@C3DE<1p8acXd28AP@MST4weH{B4(y zF4rGGL5>-&Y*`HT>-e1NK396txdgRr?dZ7{cb;|n8lL%#HIOXx}WO$oAl~z5& zea)Rjhf1``HlrkX;5L@+4>n~VFJ9#KG@Jum5x}+ie}c5$MKE9y1IOn6AtOR#U`*Rg z_#NFYpcCFfpx9Zs7;_E{2Or^Id0#jgdz!e6UCS2zXNhZfK0`S#C0OzD7CI!i2o-1Y zti7W*;hx)F{GL}X@M-IV(DRdM?PyO7*k_DV4i9nF6g7I+>jMt=7|_@SNVG-XqP>|r z9@a?&|0h*s+W~&JX+YrO{WB1GwV%^+@8(EOI||Jxw3NEzh*nkMJtaS0h*wf`~hN_`Jvh0D2F-_+UV z9wkWruE(9Om`bazqjwimm6%!lm$1Tf2S!wk1W9APJc9a zJjJPId?h+zk72CMC@K>B0GG8Ug5}^A8dM?TgC8kwz1F~hfILnl!Wyn(F-Wkm2m=hP0sl_RO(PZ7>Z@3tl_hi~s zS$a@D89#5T!CIbqofKEZHQOP1m=3f&FcfY)(x!)R+Cpkn8}2WxBkq?&Q0in94&I8z zbDIb#eIyuhlm8n$yabzOuA}FN_d&E@B{~L-!rU?eXdf}fp(uT{dN7V1mgC(l+t#qZ z5$6P*PmP)DBPr%~RUOP0xRGz#F)Yzl71gC}*_?18??+Zy5sMH|KDoRUOxnUjo5fW(wt+E!Y^BTIgM)2M2z9!d|^fEJ(NkXQ#Q)+jq}kUc+r%V-(AY>3zoW z$CU*x$(u>dxDe*NE0(zZy9P_+X9x~-e&Oz%HKg~%&hxn*SJ)soo6JxDgD1{>6DH-p z!1J3{u*IuyV%C%0FyZL}3yF8DUs`uqVEcNd(yOO|m{?nBJq zx8&Jn2TD)z+1`7$J*d3e2eS_yXFPi7vz59rh)(OEzr_h?L{R7Ge6NK8u|KQU@ z2^zEf3Kw(29i`>I;+3baO!I4*@Mk9{E639*qMX+8QTW~I0DA6`BF~&M zp~*plPR)%R$}S zK2UjCf{WdBA0^Y@kR$PV2p!*G#D{A9wZ=p+v0a0uUUDIAeE&jq<9D1M8;lySC(un> zBxuhZ9eOT)7XF;G8}vl4a0)wW!7eL=B;Jsq|8_*e<4b=8e_s2+1}#UFIJXV=@iU|^ zmWs5iR)y-9+yc|5G90WMTULJX5axM{vL#eCvQ8vd`i1zIV`W_Mh;FkuTfK=S~ky+*P(jIgW^3iiBGt)3N1i5sufL z%_c`w3%4Iug&PeD)ZqAhSS2!@OBRf#*^<|xs4NSGht8Ae1G6!2nCHqqx{RY7av)On zGIF|lEGAQgUMhG;o|@`&vKBIoTj7iC+s3ey3v`*1lnB%K9*$Q7|AVDlV*~+~W^5t4 zuv5a{xcNXHIw}rv(bJV#)h2QJRZRp9LZiH}Q9ape*wFlJ~O4H|=Cvd~kO7uudg8F;1 zxViqaKzS_x?`c(`^`y(VWzR~Mcj*Ihe9*wf9eoG6#WwiP#1RvHIYC3lUGVVv4fjUN zQmz2F`7e_2#~TB@m=%sr4yBxwIAHO(@}gg&sDhl8b>I6z|nP^V3K+e6s&F}`}!`FWt!hc zugMiinywMI^S9yoif_W#3m3w7-B!>uZ^Da7!O;6uiq;E(vGD z=`H`zk)OMMexOMsC%nd`YJpr~feNy~F8r&16MKFLp~@i_tBVrhVqiPB=w~Xjp-$MK zb4HLc&zH2So<`qe-@x;NDLwgs_tl^_Y>o;nduQ5;T@lO35k5OodsUvgo>OPD=kWWp z-L>#rqneD8pfqRb6zYZZJiLiQoE6Rc4SC<~YQ6RGK;V6+vI}UT$-CJU;PQ z#;Pukg+og(kyrOdvfEZ#R3l_OlQ6I4isp2}^1Y%gzhn$O{b?H3q%VO>{H~C^^a0fH zhV+fPY$!7rLLOfSD@R${)MP;?6wYPGey`^~pA$g(PX`$BQWE23Vo}?B8ui=Qiwop{ z??zK%f;!+NpTd>KEM!Lv=8$>358~?^6AZ9SWMS@ms9B~nEO>Jh_Pti3$+IP>a(yeF zkR1msJs6HWw}wjTT~snklr9h52nNrKu{XeitZQ<>&FKfIQC7Q#=QfNqnWWhuv=Vp9rXZ~1epqez5n ziZEdIJY(;vLq9|mOQA`^T55VEo9yhy{$ zI%^!Mo6q-x{o$0xRGfY{l6>3fZ{vI*2*dBEV_U2P@~mkXb{DnDbiWH1XYQwy)Yfy} zo;+)<$_C;C9)X6@R$R^JGY)?C;Z`-R;Z90xF_A}i@x?qRJf7RnjX9AG|1~9Imh=Ed ze=g={11i{@RBw}UZ#iD+i6jfn6@<6u=i!W0TNp7W3ZFP=;gYZ|I7)0Ht8@0|^IyjB zT{aPivl6U~r%K@oa0kJ{sY127ILPlm$-0VHGD|srWq7`p8>8LI)yWy6-JbQ#%jOKZ zd{v!2so9T#@-3vXI*85DR|5B~novAtn3HH!042r87`5jyNIxD;&Gf`*ftebOxHFoq z9vg`DzH>>etvI!gn&91=W(z>Nt434cBIP}qLY8^@SUhvAlQ!gcFJvEk`bW{6g_MN*nQ%32K(%42EyO3EIL-N-chlHswM zGn1L}iuefqN&ie+V&7*~W?>!(TKNWa?EbY_&$A@=J>^|CZ3jrI&0In6)=+HT@tpUv zRl(r}YHYE`AV*aP@YN>1=dpe&Q}mcar#}nMv}Ki2h)_yO zLc_@Ry3b3bB}GX^nGGr}?WDf<=ll46e);|f_k;U&U*~n4$MHN~y;CRG(lp52W&iAN z3BSkFvQhAG-53@IHj)1(2~qcEInwL3nnYgrz^z(Rz~+p?K4B{|{nsv7yniE{ED)w8 zpFJV}p9jwIdknKex%;BVGUk=vPpmCih7aOCg2%84oXa>5YmK>Fc^t>nSP3YllgKs> zoa1eEKZrxus^NT=EZr^Uhz~bh#++r!oQ5!qk=i{0qJ7-BexZ%_jw1rZ%AUb zQuN@`{%U+SAWM(>$kW=jgRuChEM8mCh^^N3Ox2oK7~UyC9Ju#JOIMtU&j>^Z^$biO z%ZK?Qo5`CrYiir>LoN-(pq9fysLfXa+qa7J)CG=XP@qU&J>i3iR0iDJmcq8*Uq-%a zWW)VVAu^iy1q2rOLGyv-jP@iO&{%f>LSy6LPSst8DBj|j7=uia$~8PY=!MHumf@kh zWf1CGjqA4PgP8gNjyd~*Y&DNQ+I9-8c0Xq3Ck{aM`z*}YFogoec6?AikDh4VN|o9w zz#-^32A=C+zP^d$rAGC^Z#50NOIx1CN}Yj2PZxq;+7_%hCP0INdN4?l^MrdhV@s1F z=_!!GxFYWU?ViZZ-&%B9kRzS;Oo{HRT!!O&Gr(@LDY^eP8WwNdPi-^P_(gjr8LS0+X=8PNdSTiIezv)A|$TTphf2c;M5Ci zx++DS80u%zZqFRNFB(cO2X29voKJ#1y9MOgbXc=A8DIZ<%+5OuAZ9uYy+(&1(pZ#! zxZldYI2sOb_E&&riyVwR=)*N(d+^qse7KbQ5*|%{!@hBmp+PS`u}WSo*ehAYJUBm! zYI{!OkJviq&srst-n5_gXNXV{_5~CN$KYhaO!%@-4PJc628jt$FxXHB%9jOE`Z?#L zKD`JOWYeL3>wFT#Z$~z(1KyvPrAj)X@V=-UmL=u!1ix;oJ>(Sz2U-#@ z1%0}yT!Hy9kjU}Qo7h`xWl(RUF4N*O%(exlqk-TW8mgj-TlZn1rclH2oM|UlK!Z zJH_IBwK?`L(;aztL;s+j)J=S{qMMmNu!QxkDP@G*!a>IOBgeBh!$rwrRQ+EE=M~Wb zzr(<|E}TyfXs6=~iPijb#;Fh=BnAdCVRS^tnSP%j%k@ApxI|uz{o1vUNn61&mK_sd z^8EuiH}naZ?`gsC-zG$$Rul7;pRmezIL^=oSGwpk!kz1H@nF|ASoyq-b?J?ULLD8( zg0z9z1|At)9E1OU-@%$Ae=+5R2CN+Og%4{)XmN=&HGd;au4fIRqR|)T;NO#g8S>=R z_4n+m$^g*4y_+tJmZF|4Gx@S6oq>3z(%^qzkva`;FUBXu|KaMuGw^<(3|(f~l8zB63WExGX3Ax@ zG+eZw=WpDG9qkL*xh4W+OXy!_oF5Ntha=35k7i1TO4;ZeN@V-D7Vd62i`GmKA`P~u z@Zf?b_NSU2K6dtGx9zV4Y0C`|e$fmMeOpL+5~NA)2_D|vTZ`F;F5ysvC(N(j1e!PR zV#I-qxMe$w55M-|919)i`Lq7s9icuVEkvXNxAFTCY59`0f{(@o*Y7pZ7N0un~T9{)p*5URy6X^HUlla%=1ddf5 zM*a>9s(wt1cAF_N!`1u2w_gB;Di1Ih?_J@8#7_Fc%$qq`)dT;nkt3=@7HItQ1ypuB z)2&LE*xWI0FH?7#IT@YLTx>hU`0j2;X-7AbIa3baaGp?uv2RR`T`auyF|%hs2Z7~| zZkTvih<0>sr8fP6@NsxLIrg%?zQ9|WLZs5f` zP3QQ5dURG5gWrUHqU{+2s`pJFHIvrifeEKDVg5;2ICC|ZW4Vl(g|g5h9}oY?PWeAyKAPi=;$EthardIfWD z#YxPWKZ`onCgAP^f^<`S8?M(s1r-K6A#B}h*!E14nkp|NBf8GabDtatp0C2OYX#{m zxrx;6Xcm4gRHDa)Pr_Vo<|y)OhD_yFX73&q+?MSOp9VZh?)O{R>Glhcrtrv&v=TT~ z_zwj+*7RUyHFPB}f~J5kXr|E&1DZhA71$Bcs66V~dj$eYcC%d~UZm)%3CL-W^M_oo zF}m_ecrExMnrLL9fMFS~Xm&%ZO^dL~_zUjq6QQ2pSKyAZ9Da$O6AJw7#t+|P7^_lG z*pBL*mK!5{e@4teT?^H>(_FsF=hm+ArYZzzUmAxPUDPy>sB&$y^)pKvznH#mqtyyGKhTK z!*;L!z#Lm^fen9E(IqE<|oxMw1b@lN2mUl1lYv;L!z8TV0kT#eGb67)>fN9bQ?jxUxaLR8#g{55|7 zZ%h6IznDezhs#;K67PaH)26U_8fNsIffy+e9DxPe8I1Lhe!lPIr8r^50IWRa2Z~x& zOs8Qc$Oz2B{9W!;^@1$3R5OjYW3D{&t^P5t{~H4vzuB_(YpqDzZy);KH&JS|*&i~# zT97g{Vs;BoqtuyWi`Sh5%j<8zcJ>4`D&fv8^Uv%#LsL5TK%Q(keG(dT8oAsu;=g?^Rmhh<#uU;TPGt#ICp1|pi^sPmpR^v)!oOHQG3b2YroO<>n9RUneeG3@Uo z9*Meq3<7-P>C#*AcBx$UFy`@MDDJ<(_{Pnl!B?`d5st9e7js_Vx-Hn5bPOLW1jDtZ zt2l;O2~yoptd`haCM75as`CrcXwGxA2Q{3%UyT~t`QYu*?XY8y5%C%L?>WaT8$b^StUio7&=zO#*Jrz;j#h%-IA?S`=<9>*j6rF{E{p zlGOO0C@tf@!N+#Rp~l@?TsLR}bB){I_ipq7&PYuKJX)}3_aio`W)IjIY(|fd`qY8* zHWsKJp;P+3LETZ6&T7oYvazYOOkRg<42fmc_V>Zu2|W7ZkvC&0*~DdC)9_wPF?Uwn zwtrs2b=G;EhtH2Kad5m4?FDQ}==wIiE$0fqH@^bWO%?3-^J&a|OsyZ;u!Ue2mqC_X zgKg*X(E7R~_3kg=_Pqxfj~Ppd{0>K^rD{8Suv?2-+e~H4wAXOmH_h;{{4STD)q})Y zSJ)#1pU`905XN1d2hlHeN%gn_+1{v1Pt8^#T{1$DBCAA-!Vi!lv2N6CT!{SMDMYV_ zht8*uF(wPH!tRO=TvMR}q7fQU80#3A_jdMf6Ady0{NyFo~0J$&jY#UYN5 z_C6>VRzK`y*5`0*TOgq&Rp;!7Y{{3AE zoA%f-XRGp|D_kMYgWuQ!Wij+{gb{~YLEdYP}gt`GNIAK^SNOMr0a^Nf;d>bULLhe#C&f9Vt z594lVxY>boPS2#ZCW>IvHVpM4yFv1n5KWghCF5Jea958YlM%R=oRC%_LQ`*H#nf&T zw?Bs)Rf~D;+qUtV1cbmO^%Xpr`xe8VC*#spiZnlb6kiLSWzM*`%s4f|cJc%xh z*D;}$_1o&FCT(F3j68%TzLSaXHFMU?SAg`a%tX<^44P|eL6eLIpwdN^z*{+D-Q103 zeJ5~x?HGSgf+dYst_OqX9PD(BBf6+M)3l!pFv#B>bG{eEQoTKj>N$hwqm_@T(%)9?O8?g>8^=r5~<{b1XrlS|~p~lY|8o zfW#FZy|mDZzKh_rkegq?H)I}jljFOMZTJHH<3(tBX&an}7Y}oCqRuuIww07yISXA<2Bk#srPw#olhVs<8#9+cd(uG({Nw zD~~@f_24GeN;diUSl!(AAtq8T0`&hHkqyHOsOHrPbVcAhwm7eySv*}IjQb>Mj?5Z* ze%^odLQMiZ@HC_IRE_DSuKR34uQaU_?88t`H!5{zG8z3g1ZK{OV76C;vEA2!+Zvo< z=fx;GY4<1|WS5ck(G8&dDFoscb3L>XvgA_fM4B??9^=<|2ZVfd=oBews+l^M^tlR= zAJ>mCE|QBer{p!hG4w{)w|($M<{u^w3)5#~9_+)zoVHij!z-Sihc*UHaHvuSS6^<% zyIH>>re7CKUBBRyNO6$!7(f@jxNVnRthu_T8<++WSo`|Jq4#R^Gu( zt8HZ-TdC7YuE$_S&@nh=?8y3k+fDY&+lWRF6v$+gk8pP2CCua)POg_vLq~uPuJP4` zXI(yc-$5OZWjcVV!6z_~%;NQa@MPa!<5;z>m+3a`<)SO4jq(s~!EJZWA=Ahmt7j}Dd2;Cu9%g#@9r72Mq9@jraquVy%p??;1vm9A3 zL1j`{RAj&8@>;N!7oZa52bgI_vEV=90Dp;12I>l|#F21GI^)=Uwz+p9yE=CP1p0np zmctCXC`N!xvA@KwJuF-QI$4q|Qyb+uIqzqU&%b3nw`AaP87XS>YYp{TIDy_YN37Vk z9Zy)uz=$Pfv(=76)Um7R^lcH^?Yaq>mg(S_HxB}TweY`$UB>e|H*rDaM#v4+A{mt` z&{Lj8(mJZ(9mg2XOW6Vs-kMPJNp|e>*~^&9h2ODpz6O^6OUJ+7T41{b{*VJg$M_H0Vgy@i87?r;uMgsRYGYcxl@Tu%7ssw0C9iIe|t@cDJyO>$hdkzP~8!)BuD;nn?wOjlA1rr*pOD?A@q{oDVG1Nnz z*jm+L()XJ%w$+Zz@8Lrs#?Yn!A#$H%wI#U};mj?3w6?N_g`8jZQCKy(GyX}Ahr1&^KhmJswP;|FG^Jq)|QJOGk!389%q6`K7(!n z<{-G9GR+(7nByt2>{0&#)VGL-19u#$y*%ehce{#5U_>e+?tuT**Gw4Z?fNKT&HK zoTJEG>=GyUt;JcXvkd;8z7HRGEF)}A3v6>rVST^KlI-i*_HJRC7;;IIsN`|_q-+e# zI%G$@&XzH*D}yO*`^e^372&hbVR-c=3maC7visf4VfWJyuw%Zv*K>E<2#?R|Sswg0M!dc2Isxchn&HQRKso-v3JzR4elB2m2OkI{=yKT z-AVA#`-WcX*I9|Gc1-B}Yo9-0jefcl*^szUxacB5$>M#GTAzhEo$9puc`Ali1aQ39 zW9&|{h1qb}ipkiS$NpQ|0eer2z$3GCj40P(Pm@Oc8W)PfSJ$vJEL)fyhog9o$-H@t$IBmlA{~>(DdhrS+DhWlXxK z5vUn=f`WS|41U(Yzn>-Oh2k|(z!QhOEFtQb+Q_CInL#B^?5{5=JBzqmiJ*rUJuxE_ zFKCZr((|`WGk3RI_WBi=)(cR3&vlLMzt?_Yb?GCsA{0U;0x18Jp#b{mFD#b~9l0J+g|na#N443oR( zkig|9$gR&?ac068HcSzstHkZ-rjd(~5v59YFyDD(>1r-pC&=r`l1BOM_p!m`ExgNE z1uw<8{>{gCLHzX=$TyXx^|D+iP>>@irb&_S_NrLZ*99G}+rU)48GnxSvd=F>!lOr} z=)`GulMCZu@uKrs8#jeK|Jr~ePWm)Cuo=8`UNgOSCei8Re^|MNZ_zbK1!YD>h*9D( zPSd!AW9qHAX=nf|sw2_j(_6gdX+~zuoRxzZyS_6+;x4&+Tt>!Nu2PNmK413fshST_Jzj{T;5LeNUM_ zk9feWUpA8-s7(gmfFo?AUzz#b{&MI}BICB>0@!}wLH^ZnI>h~MDsOhezw?Qh);)&K z3Vmz|cTU%+?j|kaE~MV_45OT5K;OPGqaV04&f@$HOz$utPlgh>-|BhPvOxHC6{y5c zE}tT~9v;Q#z-f*Zc|Aa%R^Qx5Gd~YAH%buya*V+VTOOl|hXh(ioPc*xpYc#M*E>aQ zVD;V{cFvJ%=GKXQaB#&&e5=<96+3I$LgUlW^c~p{>$zAKF^Bn5Rt3WUVxVx*Om2T* zLdOO#`%^1E^DAfmhx88-^P*n$ij*Uh$u38rP8F=WC_;?Ax}G&cQG%XC2>28 zcsem;IhzpJkGefF5R~IVbfOQldo!`#aPI(nu>TqqevHJ$mCAIhQ5p&^HD?$1NmHMH zWh~$4DYNkNZ|J$LNQ7!lVfVLDs6VX2?%NT8LW>?_c=`@p@*ozfTr;rKHX5w|n+_Xt zcq|cDqXE$(T>d!;Q!eX~O}`ZAD^Ypk@@py_knv%wr6vw5#h8``I0h2gW`fhz1xjB%8n=5YM*p1uRy=f(t2;7e*LI)AAsnegIRcO0v z2`+F@XPiWgNo&6hjXm^*>CavP$`PH+Mafy{_vR}rHn)hGzwLGRL-@PX2C>B})ZlFfvhsSTb>#mE(*}BIQueN$_mdO5(Fxo@RD54+;8fnQ;zj(afW-x@th>!FDwqs6wmf0; zHl2s^t;>kRqDFSFp*0m&SU_$+l0zq_QZQU?j(7Gq;{vBPsN|jFcNqQR$IL&-xEd;8 zPwOQ-u;M1O=vN55UjcOT%}~5->xvT^O|f(PVfyO6H>_XsoF~6W6Q6fB)l1JPz>}+g zU`_QNxK{4T+!fa#<+>?g$N4~(&dk6Zu0uf4H=PU_In&5ITliG44E{)&(aZVS=wi}} z2Ya9M%M^^@$)jX^_unCW6VB;MJ3iy0*SU2U-7B!#ri}OfYXQ4u%~X;ms7<o1MXr#m@@ zdK&8-&tZ<%HKDK4Y388l9{T>JG<|kY53)l7Y5&Eu@IyohVv0}0-|)9E&#eKDd}!t8 zzZnPD73{vXS;%B6&f}C;mUma&X9| z%_0AbF=qbv@!)?Rk9b8o{LkZMx;rLDI{pvASRhkCAkI9LnwOu2vKblhw8)07nvGC8 zCP>HhuVBCWY?}Vg4!-UC4J*=>F*!JtZfgI?s7*4UTI?-mLrXO@d9{JGL@7KNnnFuQ zB#GM3iELWC4EdfsnOvVi;Y7`RstY{UE6j+_*r`G94w}(*pEbx72P2YO%Fs<|8uWbs z6kM+;NrX`eX0%TwtDgn&e$2MUQ)QFr#*CAU&9oJ?E@l{BCumcd=4rI}*c~WM+DMJUyV6#R!(KLQyu8SVj*IE zY7gUc#2aUkRwmxR2fmU;q;KaGRGGXKa;E%&t{39;NoWTgyFHIRe#Dq;4V=WaOIh$c zq-K!domNyPER2Lq8|Po=SP}o}a>3C_g6% z0oIC4`aw5xA#nnp=$nZ0h78=^lnYU=z4&Ud1I4!{qJaNak{c{cYu!|7w&pCd_K7@o zm)OgtPrt&p$(iigonego=qL2yQjD@?^T^)GIp|b>7_+(CfwE{0^E|y6WWRO8g=7IT zkIuxf&FA_4BPt-WPK~U%>WEW<4VX_}7a;13HT{*nn(WB7BY8IGP%^-Z*7@h7aNsU{ zIJAXI80gUbp4ue4$BtP4YO3dFodzFK6S`lb7iu>A!we%nifr%4?Cs;MM$#y6-irmK zsXYVj+>23TPC6sLq>TM=cmn$c zl;-VJ+OnP}Y`UNQDa38U{M&H?&z<-`e2qD}TzYQZe>BrZnO-P4h+Q({IQ3&C=8FTh zK0B3jfSHoizdn59%imFOuO?Zg`vXk)jd)`|L!$2QWPdN3&d&DrB@bXZU3N$qB)LXZ zq4g9NygLv7?JB5GN;!fpQ_u06bK4=^%87dD%%$H%CWFU&6S^Wzj+jQo!s8|GbiJS) z`8o41;08IGvpp6LcB#_ehY6ZIIF0IV(F`kc0_JjxoxtxTh!|Q9TXmN5MVnjUUu7UI zX_uic$+p;V_b3xE@|0&$`-kawHzTHwCt&U1Uo6r)O4~QHPt3xE}e2J&IG| z*`^yfw_1|=_T7e6GcGgM7qkigIv-7j&51x)4UP*dkazx?H0+KDH3|KI!Xj%)K$jo( zmC4XKts}6dN16VfoPi=PjP0ox0Z?KijjMLmfiw5@?hamst9oSVZ8ZjSo-Ki*MU}86 z>3&`nJd+d|$ z>!U+}I%%G93?j-8k^JRm^uI2KD(#J>%?7bd(4v#@Ppb*UH`HP26dO3yu$w5kucFPN z8GTnWxQZ;<@|y?8|bdX691TQgZ=+&s#;xtM`!iS$FK!bMp9?pGuMy zh8yTlNma&h>u0!a*2U;g@Ps_|2oj68m|#_Ndgpo%<5v6y^0Zivc6%MX7AJ8jjVg3M z5)Suuim>}-4VDWi5Cg+xD7>}}(=I#Hx@GyuFJDMTeworwCzaUAbDMY)`(~36mvOL+ z7sg1&8$a-u(@o56CVOl;@wjP5gYFf>w1#+g^0gj5JHH>dyyQ|dTcXIG+F^9Oa@F?V zJ3;tQI+-R1=|anyt?IsWrq#bQNv%Yi6hNe{cjPZac~7&~PbDNZ<1nE{6Yr2^RxE&!!MsAOB*m zSep`o3^fq*&R{z$j^hWDuek8jBAoTgnp0;l-~m@Tc16?#+BtVQt2oxjlD}8EZM`dT zDpY1~230a~cl4O$l{{KzOj$I=-}ihv#g~iOX+8mV0Vy@^Lz` zIco-UsRL=h;zONuZSec*6r3|$gCQaPDD-e0Gh<6TYmj%0+R8}Nz~UgTxl@W-CO1P7 ziDp`7jxn3xx8cbBU5ur56>L&GP1{Y*6Q<0N98VA=KTB5PnNy1)syv0UZ0%u>Y-1VU z^h3<#JQK2_YZAPww&J!!KE&os7{mvwK-HRys1$J@S_oWquq~+9uNGKVO+A zXFmgj#<=>P3Hhct4}aJPWAz*%+AAc%`>kVa=k;2Yt~ww?rtUH%&*D~)wS}+nkB~dM zckTxN-*e#1{S35;IuF6VTvF!Xd}d{) z(lM~Ka0AYz5_rs28&VhQvYT6r*srr^5aZdpRC$jLUdZ{6T{G2&Zj00=T9J~ZFx~>Y zG|!>u=`QH6i$B_$fgRZ%xIa-d^ zPkRIhJya+!eiZM@Y$v6Wu4Kn>Ez|zpnv^aur9n2D;12?}tOlhn#ZIVr3kLp)2FPSNX3qzvA^d!2W zcL{m*XCY~SK{6Gf3PNou{jSpj$ zMmYAg^us>w0T>%U!1q~l6|e2kN8Kw4^w$k%IBRD`#947N>=(?~W!;5Wp}X+7t_(Tt zV~6R{94YLh6nHg?(I$UO(x@s;?%!EM<<0d-LvtRkXpn{2h0}@ITWh-bgEVn?wTI`L z>ccfqP(&!)A4U1Py0_CwA$)nc#|m2nr22e+h*e$=~48kY6H@DlB1n{XY;uB7&+B> z93>|YosuV!Pv>>%lnCy|r7RAUqN~B3bJ6C{4x~3_*OOfvPLlqmBJ`lC1T{}EX6&w7 zkl$(ydHL}rrxq`Ou={Hv{qF!Px+9%RSEo?s{t_C`XQ+UxBo#hk~y^lVc=+suSfWGGy1JT^zYcmF~N>>2s|AL(u&_*~A6?R2 z=+{k=OeLJdurn(`Ou~|WpuVtknkdbyGNZ2^{DcqHlwDW3l^U7vrg7>qs5T{{>*=t6BS8kQyQ2np=Oj-5<3X-1j3brq7BoISg{_EKOw?;yQFzKkqA{`vSApQEBg?m z?jr2x$So(%c#=Ei+cEg8855CH1{z8$$zGX5^tzTHO>>^f*zQ`!w_LlH+GYf!*tA%* zPr3nad*0YJ-Be&7n%83JmI%`H*oHI;>Y@3a{b;6e4em^9$KWSUI7@dLn9Q+c*HrHS zStV;a;weX9o*4OU(1Wd;!^xO^1wCbQ1Cr+b!K0GB(CGUFuKu1w>=Wvt<=`MsLawYn zQ9Kqj9IY_vhdRvGYT;Uu&)`qvbv$`(1)5sa%FK%Swr?UbqHUexF3Xm@Fqkg97xt_a6Mf&DfIX*3srzhFH4z z@I;^6(FgMS#53h9-U{RtH`&QFbmu)*Fj0%tmRGO?8->tSjw6WsmP3Nge0s9Gf@{S& zMjHp$lIl4vjC8vMyz}A_H?3Rv>#;H^_#9IAS=WY28t`anaXPLI5U0P2-Z7gdFeG_J{Ne|6YJ5Vk+F4{^R1EJWKERW6CV<{O z8>llMK^Kl-JF>@|c@}JmkAFLoXNzUXnrnyPh?O2OJ3E;~v|Xk}{Ojzv6xWE&EW)<= zMIdp|7>DK6Xoy${tP(UN7kT>7rj-Z0%MVcJfD;@@6r@K(8Zi5NDDF_W#rB^TBnEtC z)?%qRq#lqX+sCbmwy+{P{N!eRKDRXvFJhcHida^FGY*#*pu>+SvV8mln4A;g+JwsN ze>$m=^b%!!m#RhF&PkI4%Cl(S(g&=qlPP^6ph8oVW)PjOQatrHk%nv8@J{n)vu{n7 zkQ`<|jahyQuKp~<%oiQ_vd4m+N*5)At`o3BD+g3(2BFA=L~40(E$U|`!P@WuydJDV zcN|P%s!PStAgu&t^j?9%^g)pMc^fR3j6nbELUupbcD7`-D?J~600&}3`3FkEp&+P} zy)r?OYHZ$1$HQybFQa_?mcN|tdi0$2(cDGSZ_Feg{il)AfC89)qkwgg>%|woZ(zy6 zX~ZSfiC)Q@KssM7h4C}-%of*GNDE5X7sUpslb6Ow|2@Mjh+yEK&VBINx{S8JbRpZ0 zcO$uLidlQr>7|BnGzb)--5-L;ulKj$I+qY(_kRO-{i#&Fr4*RSea!T>mE_FxFr0W< zk|J*_tHujq9vsMnv^P4Wxp6UlJn;oGYjc ztCmM%-Hb_a(o=@l$x-t^jLf3~TPR%KlR{JEzrseNbF9;CQ5dZ|LWVE-!{eiV$O_IN zCpNecd-74}uK`5ZTwr%j{{YppE%sz@D%rlo2wvtq!*H);#GE)f zGV&E2c3R?-gM#$rQ)A+CPm9Exd;ovJLfp6f5g6O3bFEs=wExgJ4z`|QWV?}-Y~2R- zT3kQQba`6%NuF-g*C#n!FEYiOZK;m<3hH300G2{5%K!J9{oXCas)4>M|ajY)rv44w_Wg_jFD8I#kgXek(uSer)6 zUUMz8srgX%y#X$)vVy`&H@b#vD!0C~4B{TC!jw>ddeL<=-MKait&W7kuInmf_rEE~ zzcQE6xB86YHc5P=Gt$Jh4XA+rYfS1oM1$ftf>iVk{5m0-qx;B_3uOuHPnU8mt(-_E z-Dza)7d=9_{fId;IT3T=GjqgdG2`#qi;G+nk#BH{4jH&YAh*SMX0(PbU9^Id0Bg$2 zvZdqR7eOGT9Uho00kgVzEGp3>_nx$&(JfzrrJ%ShnwGl>Hl|XNKHJlTdO?3_aPkva z>hu$(R+bWWOEBf2$uPcSCePwX83$nG64D{VDEIah{|WUCx9W3rn#~|rHr9% z`vg`QHBeHVq02TM1;y!VSR7i*jI`Qt@6Qiz>mk}PF8ky*Jy z3nh8lG^;QPo@%Ydt$IDEH<6DaOJ3kL2Ps_Q5nv-2=^$|54V|p6DG^AQuKNb$F407NPB{H9}ge7lgQJKUz zkP`Zb$FFO_quyS|zbOIp`&8)frPJy6xUI~!1(^`mIE+hr6zMb0#r&~Kk=*UFU|Jqu z!v(i&Alxe#r#WhqzWaud6j01R#+>JUwi9Iwe|Qn$DMJ|c_y-CFe1MZeszhNcMH9{? zb8#pIjJh5^J%OuMa=@Mdl&n=Eb*A9$x(#T{C(@L~*G z?kG*F6ogod9vKo>Crvu<1)++UBE8-EgBkC4Bx>$>%2t!eP9b@kz*&gz{SkpT z6C+utl{w6niZ`ehkVJN88*^l|X8!P(dd6d-HZ7Zcn$?p&fNRa4@zo^1vq$$ez&wSE z5Xs$5F2~h@X~0virP7_4Z+{AAiwvl%oiJ5RUjUJ!qgeAk20pgrV`+*t3by7k_C%g$ z2v{>BqKBBcqU~hnih5Lf{|mC;p2E{~8xdrT?g( z>JVzWDT-}ye}zj!t?91zQ?RPUk3_tbXV%Bwh1*vzu(e)T*{@GE$}yMTGl8Z#kF?($3?oaEeff^7Y0Ht)vucyyO(#hi^t zFmHDRQxK~|&2Lx`$&eH}rJKdHtT5U$x}QCGekOT-u>s#&=d*M~9rR{IK*sQ6_#~bN z=eiSc%e@dX&)kT%6q*5j_7UZ6&F#NVt$<{6O=`6%nYfq-5c`&KTw>vclRqbsdoPQb zoA-Cn(DT#L*C!Ds3R}ahWfl0uT7=%{=zy(aUc^#e7U!he;DH6d@KBo*eHt@~XjQbK zQ{QLKu`mL=&Kok02lf-G$;0edb$23Duo{lNOQqB5d`NDaIladz`YY@@QP%Jl6DD|= zC`50fZzj}%ZLBmCBke^WjBJ2&Y(0+8_9k*WCFr@%3Z_NblKL)YC^cwfGS(T3U?q?N??nCpWgFuHKFcFWYG0F;a;1oxUt=FAOvyRr+KfHE? zn%Fgye^))1slcOMgni8=X?`0p( z7)Dbgpts$nL0`ERyn}T3E;7?F{O~rswo;EcegBSX=tqiTF5&SJW4hj!I}?5s!++YZ z;O&KM7?rStkkM|~d{&Gm1pi^3)RqE>?&EbnxCQ3_{bl#YHb7WjHk+n;98ERjiI9*2 z@k%#_GJ8*0V0eU1wB%AiL-ACz_&NACy~5eHg4A0{oz8G^!&K#BC>5x|6@Aj=W0W!T zBE23i{7xh}F4i`L`I`o%1;ItqW=5NIcWF*s&s4MCm@B4i!I9!}Q8L z!l%pKsI%HP=F!72R@_yLxV@SM67!A7>P=DP-|#*nd*(UL2{8w@UV=oeDTT-56=+f` z1?%sn!0{-=)wMkETo(Z4e}1Bmn;S6`d|O>zYHldO=7om-wT&-|6wvp5S@1pxTH}H_wAC$$%E@aNj-o$ zk!V1q8_%NE6?LR(ck#q)9(izkHa>P(N`zx~(Y22+GiMyN=!q5Q@X%l@l37OByV!)h zU6ux)(?8>x3Bf?bI7jW10cKk;rzd;Qq4tB@ASq&u?JV_%+n%49tFo*3Qu#_Wftxqq ztNNl3lZI9F1-Qu-GVY{7uTyWc9%)Dm!HaUcHQk_IZYE%F{6j-la*d*^gpW&R#lly9Lc- z7t#EHZ_Hj-#PY|nbP{urc-DB4wvifAZ*mLgow283LmwHV&64z|o&&g_(8D+9##nL9 zW>z!WoCKbAr&@-JAbPbEc%KU)^cJ#PBY1WJ2`FZ+Z@LCpDsOiteE#vDGmoh z=Fmml>>kbOD+Bc((3-cOk)5o`%~Mz4Vfgn7#8zKAUV7Zrt;6A)xBpJ*^^iAimBk~tO+IuY~jq`QnXHj|e$6?HiNei#xhbzX=ITQdrc{l~bL$ba}E{{}|LW}{?eJRDvfNYBekll7{T@Yf+8TydI6o=BA9 zYPUBKWX!N#VurLybCmr!oqJE8pJXm~{D6i#$|Nv*JGZI*1RgJMbChXC^5D~TwEpn{ z;8O$t&fimD>EMkgxp__c*%LVGa1j;X7-3_pxc#lz1Xw2K|SoQIo!_oNMd? z$$>JI?4F)}(b}GfJ*bpz$x|ad?Uy2yIg%Z;jSd(*6;?cBG@n`$q8CvzYM0zN6^PBG@|1 z7Qb&1CDNQr1au;qSE{Y3ucQnDHZrhFISFg;wnNZl*+>TO~m%+|7WmeF25%d&`QBFY?=b0PT#|^H6aWfBy>{3pN z)N>OsrOOaBrrr^6T!nZiCmVXygUO&yAa4#HO`@bwh}|cToA(g!ZrOnO0yO2()59g%zS70Vhtl7vqmX4!oTT+>I`(3fbg%4beag1Ol$pS-xFn6gu zl-{p)CU0kHN@B%O`M{Xg>{f=V@>7Dr#Q~>0GiH*lacog~3~(hCki0a7uTrpuDU*(a z>w$B4uy+!UAMDNo;@8pm*9Smz>UGp9I?124xPUvxmq6KkZ+Ngal$YIdK&0dHyiWg7 zHdYk5((fIsX>X+l+u0w)TsEy^IyoAKt*A2=m1RqUX#A4X)_(`i3x8qzPz2A0gHh9^h4=#Wd|uf@W>u2hMdK32fy zAWME@TSAP(Y$CT)U^Yu`OMd{p=p_ZJMn4FXN1JFEhaYq+!8T?;dW{lv^eNvYx70XEX^U9I_)B#+oZ{*bl8)5nmSX-HlT?g zU+`}(v_Q2B#p1F-d_@tInZst{>|=D8ZCh z%%sK|!Gf&6mAf8eidOFC5k9-Eg=#n#L3PL0QV(L(7W)q}BM@qw3T*GLlYkUk%KgV;`e+E%P&JVn_N&&|`vd};E!`GdM{uM4n7w^L5-cE2xOTdgN zfB5)M&D=fdD_lZS0+YP56>1mjvb_R@CQ{fZ#j5S$E-dro6-^9TUU??uAG(dh%T7Re z>}&j2?n>)!CX(h_SC|&@4E>cKp}c_xEgg~uJB3|vYquxL2_&f{%3*Br9dq`@?j0O? zL0}`i3*ReMK!WCOO3|}qGXmsjwwPmU3s2%w{~5fZh5~z{7eou6?T0lrBiLOVRXV1$ z1dm%@#N{EW7%0*s^ZnQOq?n@^m@o;Go=jujH=lDeE_q<0yf&%oN->q+(oFPBNY*O) z3Eww!Y|S`@1^gVc3bvur2ZPWlQ=6B&YYe+BdZr#9W~hb!x<(mnm~*0?I*L%)tEk2iCgI>5Nk~Q z@Lts;oNjE$GD}ME^vz8a=6DOlI!%!1s!bWb=W(si05;N1nWFAp;cge%(m^K0uGano zCav-ZW+M0Q$UZ6Z}+8MCr;Dy6)5q1#c%ow2KB)8qtsU zEC%7vUB|(z@EwLryW{L*V@R&!1HbA@Dy&$d&NRf@V$+;Ly!xjU)+Wp(6N$^BcaC{5 z0+(2HKq?5uNhYQS5*)!c{E&%|-o%J|J!npOQ$6koh|jWd}2 z9{2heGT(8&{O5H(q+2dzGM1e}=xV_~&ZoJ;z9G2kZ3~R2^L)fv6`U4!0#92ECbjrI z^lR@jP?a0R#*Hb)wyB5VQs!-b%E|$FjQ?97In4np_J-3P7aMrCbpy;$-$`>nY{wh< zKlr}Sk|eKF23>8}!SUN%Qm9m5_4f9ZdG;gfCTLNqkpw>SK8+@4^TF~E4-Mh7_z6CG zWcJMz9$e35flmW~o3Q~}xJo$sL5@`))nd~Bnc`f%X>9V2SeC5u6dR7&P(j=wSKT?A zDLuCUMN1#!k})Y{C)j8`3p0ci=Qlnzr~~JhCb2W~;_%x9L!pW7hbL6lk}%f=mRIlk z=94x&tsh7O(@){5bUQqhz7ER;)~)*M9XRtSmxR+<{wuNSxe?{>;I`^c&I?x`@V{v zc`c@nv7?yuYYUcRRYKDJKfz1U6^y6XbGA+YG2YsfZ8uHB78xPYw>*%gObZg-bD2U3 zK8nXFc+L31{N?EN5c;I89FNQ96x^XToY z&CsImjhsNNI<}yRv!C#qJ9Tje`|-FD4FfxH)xL)?Jy7W3&oIJOZAO?mZyDr8CJ5=| zO#b~Lb&@yVi3@yN`FYW^U7pP<;eM^Sinp>y;CE$rcK=~DO!zSo3fkL2UMCd$kL7|| zhYl<};XsT1!r<8s6K;O77Rl!Ia+NUUIx{Hf%W)2VEP!z<~9qxgK$fU}0U2Y~5NowDg3Xf{F^6^Tr2$U z#p|QnRt+*p`py5lTY#%S{U^Q<(hS`P4sbd7&$<1J(_m4!3I=lu8E#^Q z<}J`T&VeE))j(1&z^{PmAooNB(^wZ~MoZJLkvS}D=pdRVP%ZCo*JH;DJz(q0GuYL) zmkrS8xC6_2ajO4u&`aIQM?}Ps-Iho2@mLW1H*F7IFXLdL^JTDEJe&2*DuXSb?C7}l z8lE$H$xlt0&o6Vj37%^@;d4MfcZ=J`v~nBy`oLB^@}~km48Dh6u~HE8>LSR`zYRq@ zx3J}-ZsAd{ap?WUmV2Vsj%R~rWAFGOxG;S(dRPiZ4XYbm;@D{Lyr;lI$FHNb+(g*u z-Ie;Up0vR*fa=6xSFtI&?g$z-UsD} z^hoKHkQ4io&Cip{g4M&sxc%HL7PtH!9Gg3U$xe8WYZrBJKUWvStT(GDZ+#Ih^Hbs! z6&~~JFM6~2j`7^*^cb9KVkR`C|A2GuA>8y)h9na0xwM(9P-PEeQlmEF>)W>Q?Ylhv z{3|IXleoJvmQBAs5+#h!!)V2`T+IGg zcm}59DIpKzmv|2nzinV!Hh2)XXE-w+a2!lF5(~E$$o^6aQ1aT2N#i9@xA$S~ism%T zO3Z>?cr4mB2ZDPB)9$vjn2{+53y0R?4Uc{FXo@ZSP%{d~{wTsZ=A)SWc)_-n_lD1R z-N4;RQ#iNmjf-{l}YsJH>W zyb~z%*m976JBpr9sDm5NjZspo7lOul&|}^O)E@beCTPG9A459$-&+h;)@P@LXYs!G zV~{=lf}d!y73XQDQ{QHHdMEC{iuW(@TDd^qlSl&hjZZh)v4gEOqz6zW3P#w~hG1|yX&gY z9#~akBY9x_${OT9InnA}S5cE!gQW*$SjLTNE~CsIkE@JyMO0Pufc$Z4oJiB&y3=(d6``80jtJ9-CDq04oYZYk%u#ecYTw@`E$I}r6Q zx5I+Eix{k5fNs9yNGIk6w`h(M&Kv&8r9onwKG|#v(_Jq6|kFN`0+JXWm<}WpC|pn`tE< zFC`0|GV*Lpsu8=}VnC|)b0~7U4Q1?AVr_lnDehtxNS%HRN$(_J!7gPXl5>jl4sV9r z@m9<-$DZ9E*uf>!Aof+ZTC8Cd#qLi2i~O@L(Z@Vzt}QKvT`Q5}W_EAFZw_Nv>*{A< zQ?v>cUjBs2B1_yMm`ArZ2t=iZM7DKx85{BKJlyCsr$Hr~*oIblmOQV5OL@72HLp&` z(iJyhpRzq0XPwC;s&zS)oibGFKp+ufPcEPASX1l*mc25M9-Y@_qr+Zv@s@o!O)$B> zJN5&-guQdL^*fy3xRKY(I*d1zV(7sJOUyVePgy$q;ZRAtXkT^_rgoNM3_l2>cBkTG z2My-9FN6YpT=As-G=5pXCY|d4gkx=nQO1QJdghrx=Z{&_aOVTywlo%V8XT!xEJ00c z6)?eGml}n<^pXJ+px?Uz=Up2B4|`SFhVhwvtU($o`Q5_}?*zNy)v?UlXbFU0AI+9~ z?t{5Eg`9`A6Ft1Nf$6oFV!rPRoF!bB>jOOJy)C6 z_iB*zG$#sJs=#k(T_WU;_Cme-Ni4p83p*D+hXqI1v$*?tV6{||N?j+hW$6LzjbS2{ zeounL&VGJq>j7?2~#!VAHpKLgnhQylk(Cia9SFd36*u z4=CYp%6gMjlqL4N-4)+*9M0Xlp2^~>?}BC181}k$Eepg{mhi%brT460_Z468s=33c zyP^q{thCubtwpr2If(5vc#Ca1p)7ew4|H^;vFbiYoYyf^$l$252fmw8uk$l+*LR7% z?^7fh*IQuJ?&A{WtFf73KT$4*gjT5peSDJ1JOk}0?|BaDteEP0B|i}o&GN8l z^bK^a`it|9D$(?hmTdbnffCtXk1JzGvxM1+?ALS!_B87dv@My8wkN*AIaPCZt8Eh) zwbb)Gng$9<|iq$y?LopOCv zVb%`om9%+rycYADA<(k4jHqF!G~Fz%fOUg5VN00~iNBlBll*8l`fnAK=#=2*u%q~P z=MZ+i)d~ICNV>3Jk7n#D;+pbN@aEvW`OvR5wbVSVb(VIxrd&t;5o%T2aoO zk8rg_ho$Yf!bf`xW`eWw>`-Kts3T!NAN5d$dA>RYK28PV`gJ^JN1Xt_f+(>=A)~W> zFZf4^L)hcW=kR#uDVV+IIG1(R4{vGeQi^*a&VFe@A52d{Rkkry?X<;Ve~m#_QwvVq zKF;6F7)ARtf~hbslr8a3=dQ_na%FJ_u+Srs1zxVe+u4eAN!X#c3HwOHvDR$wNNJk& zU?aep+i+u{U=a^4TG!B#f+XAV9;kC^2W zkG8Xo$nk9^Eb6kP>fnQD1Mp3d!fuC1x*=tQr%#_-KO`bA%A)yqmNGP$X>n?+yp0F$HRz!Gf zLZyyEU+ADB4DC1vEB?em=viqx(0URcE)n6Ful=BYq!rWESJQ&@f9Ti!fSWB=$AGv1 zHfq~x==<;&$}JSw1rKj(A8o_3+>uRscZpAQuEhl}K0@R|M>ziTAv8RCg_A4O$l7R; zIPBP9+UvC!D~8>`{BgN7A$$zXnY4+8U$@6=d0)_rD~0@wV5at=hQHw+0-dYwxo&Oz&<5FKiMzRcVeUha2vApZD!F>WPd=gHbsY=quN!Yn99boDw^tEe* z37XlQPo?m?u&5h{I!z_`l8iGw&co815;V4W8aoxWfOP*z!zul(urS=4FP?WE`Zi=? z_xxh;{G0>d77l>P@xc6sS7A(xCaIV$#jR!nAtUA*9!-`L@@Y;?cdr?3S)Y#28(xCL z%uviT$*G^QSD(E_2lN~)yaPA1a+lIq^LgJF(WAeo>f-{uY5OXHfU!;jjx>g_b3%qQ zI?suN`>d$$Xnd3;J{w7LD55(L8DJ%?5#D$6+&5ByhJxsh?JW+BSFg?~6IR`rZf@Np!&8*`qLV?qT?| zQ3$0jt)|rp3x@u%`cr_?_BD@OT+SgP9U7xBrYuKHuQ;xaGJ)AVx(T z975j5mcW+SDb(U4$LyL^DRibARSnMIXFXbukNS4u$ecSk^xkn)-6zY2D(2zjrOkXs z{6Mr241+)F<(TT86S!NcQxsd&jjwZ**~5>)+<`-`G`Z#>{C!!C8!DBJ_C*ki=JQyMNC6z@%HZPE zCtMkv#P46^*{LQgSdi%`aopU4qWsLKXt+y{c=h*KQ}0Au6GKQM z?jFuoJ_pZK?FDk>54g8z6h1dop-EDMv3&JPX0$mJeX$Xr{L;os;SQe`$PJQ5p{?}=rrtRQIuB&PbtFp|WKJ`iBOzQ$i5=Hg#m8SK zvmY(-;3)HlOW0tD8{Fc>mCD+3i>CIMxY&2=Q_rP&aQwnr9HgEHHP3G2SL-_%|0A8v&=fLQ$1`9^vl5Ne8b=;p zD)>85f-~+?<3HCIV^aKBAwx2rjq=o_?N8hIYn^rQ?ne#04h`jw`VVLByUqoETic=p5qyK z(-9Jn3DiE-jgYtLpO6RjhYGz;c=?|el@xD;mQ3W0D$e4^=S?EXa#OY{ei(JGEaUwx zHsQ4mLSOM-1lxaO5Whcb7q7QW1hIqL_`hoe)7ZAP{PmRvqUZDLaL;r(rlLI=Z6vEP zq$mxYx21C5!#qIh;#JYg$km+Y8UrwE8o@S9JA+3jEui^_+F_MmJv1Jx;CICLg03)U z_xP^N{-q2gV-HD`+TX;T>vv=A(~okNGK-jI>ot+I!eQ9H?gB(VOQ9``6iF_=3FEJZ zu`q$C>+x@;dG2eJlDbqtw$1OoblpxjxY z&kyjx?&gh2Ff@Yo^v=W7ojgqOv0$}(R>8~PJb&idV79zMUu-kx7S=_N!F6g| zSge;FEX^;3zcC0ebJW;YkpUbjQ6fQH%wA>8fZ=kw0(Dn`CG5Q;jyzP0A;z=0mAP&- zY{?C7^L!gz*R9C5xjcl~$@(C<`6#pb@Edjsw7FyciYdNTX2rA(i{PN!qHinxbLZjA4F22W#x z2)o{MP4kjr(dT^EidJ2A=;BbeH=&U$`#p)hdRxuk{d@<@rcVH!>2ty2>sQDuS%}t? z@Y$EQd zGh(G(pSg&mAxuG-1rHtd1`f7zY)Q)&?t9fUFpm5I#lrJGtkwiAS39w{XBVSe`Un2t zn8y%1?kY|%Ps5u}pP})pR-9TC0$R(Gn8SA~7Fn|e&W?|v&FTVKFF%^RS`9F?eF>C} z6sXs>*3h-ynT}{l;qo+XD6%;RYj%yGfxg3qb6E%H^j49^4xf#NeD6-+_a zTPP;)2|jZ_1)}@YC@CftkL!-2sKn{C=fw?JIdmt@ex8Kaf`+q(nI@<@Ae6UiDF$0r zcj8QynM_bR{un0om*q^e6ZnY}c=rn{?u1-Z! z#A0}>*|r7e@bo+tP<)%o9TlFJXFG3#>6tM0v856G%0@AtFo6-! z_l+}oQjS)s#%#V}0DJXg0RHFSh`Xco8P@yL&fVZO_>pUN36@mc;^% zKf+Y61K2+E4MtTi=iE;J0(DtM%3i)7C#gl_Z`Yx$+(e*&`mD#;1sMoTg~pYB=7XD) z>z7_j#6i8TlrQ1Of}3KwriY^-G%AMqvlAfU5G{HPvsuH_q0DBID?9xlmQ9&DgYpK= zfX|zS9z*dgrgU%$`B625X+Oo;)v|2XWP9P>y$e>@rm_7wyU-%I0c)T4azBf`VQ2AY zheGv>4XZB@8@TNIP~B~Wfvtl5CK>0p*{j1{_g(x8#6sCHtu z=w?TaVB}iDhZ_sl?k*aAIa*KRD#L|;N(@N4*SVjxZ7uky?0mc#1HTG+BX4R(K( zg&|HAxT#Z@dP{D>*<~n}>z{}Yp~pbFI~4~#kQZot;kejFpB`3B;-t=5&~~4{{Ld>U z6ce0GTXr7?idUfQDa9~zd=lNR>Jx3e(+LrpUQ{*mHoTb@MXOef#rrw`U|Z5W(zi+C zH@vxm8?PupZe9eYNgfu3T+E|sSCa98oicV_m*o%ZTarSWKqR~ zEATml-4qC%2ZfB2t}1s!U5&QsT!x2Vp1`RcS#ai?Hy(7=$CMUZ%6aw_#tXj{wXvg^ zt!@a|-xcN<+w@t6WH{a%90>iHx~wMRBAV!_LC$6^hG`b8^x7mQu`8HnKU|Nqas+x= zxdXGEHU+NSD+7MPC)j_}g>L+u$2<~yF(GycJ7oNdr+{Ye>arv(RtTa~+ZrKxqXpab zS1`Gc3FMwT9>WpAlGxpT2e0mVgH@aC+14jZSg=+ArIjyYxg$#;a)%+4zA8z>2I`ST z?`mrBJ`at;d|iEL4;aMv0SjM*A2Ku9&d4DsaqcKYosgr({LS#U=?z{u;|rp#uIx!@ zIY~}*rQ>Ih;p+L*g|qbwU_aY1Wb_g~(x(rHD9FI0C~4T!eHj+}F6M8^RO9P?rEFtB z6hHn?0W9BBR<~HGonN>31AnhYlO<|#G=6Os#|haBd^#I~N{v~r)F4V0@^AF&9~zYu zf#cHO_)NYHhlRa{vOn&;fn_TOb(i6{M{+o;?jOh|H@YgMJwTOZhV*Wb3R@m+2e0?~ zq4dNM+S8}TI}The*iV09{@+$um~w-k9Fzu?&W5y6_`Mxj7{sKkK7dDaH80Z=D;Vh= zgzwKN!TONI;*3;iaLWo@@lFcIR$oE2ExYiq>0X%Jwv_F&`^g1s&ca(YOIg?Ha@g?l zDi{WEtS4&=%sIbSpa+j(hg(8m+5Xk6e|`j5^sCb12gYEzZ3~+orbGsRH1YeYo4ih& z747;jf}5r7Me0FPEJnhTKlJ67Xye>q`n=eb{H)se9D#Xp)|O*_QL23IeFDW-j_h#G zOjg&U38Qt2vFLRxd@=N)c{Yk5GJWd0Yh^p;eVoJ9cLuK+=O$G%= zF<%l`jEgR>6=r@4w7W5x{XJsI<|!C3!#n3;_%Wd`my<$^PfOD7DT)C5?U_l^7o2Fl z4K-H3$$T} z;L`+iGpu?*fu->~&Gx|N)En5j*%_|;%VUM!3L(pBi|?kK#gp3Be5jT!$3@!kgFjl~ z$IJ0haxDV_&bbI@02wejqRUkMzM~fr7={O=#cn4dJFk4W(KtQj2Q593N{tn(x`$+a80=nYYZ)U-(!dIj4&Tv=d+D>z5h*A`sok% zFw=>>i<`_Ib`PV2OLpM7y%{(*S9l*h&4V8re(*zS3p=@P1bf{zjGk_ug~2lhQlLRI z?tAzFt<^Jl6bE8_Pb{cg-@xz1W;A++D!Im4&>FiskiYqZV+(}N%~?Vvg*X~JViXK3 zwWp8`!cNC|05yAkf|&=@n0r7rJkaqW3tuffFS-w_RI0G3R1qFNxQ1_TFJ#)*$|M~v z!A^{PfnoV_w4m!Xev}?W|LnS8i5sJpRyRd*-_OGK?0ax%)g3ICOl0*9TVZ~YNTA6& z3nsR?pwKh{Y;SL(^K%*LOt)dW-CfWdVa>};y@03Y$gr4!`#FdAQtag}dfjI|$uP zh3x{B^s@N+n`!*om`gBqoiy1eG=uduH|qMD1%4O3fRl=4Cq$E&!uwcuOUEBW6ig^e zClQpQ27^pZHU4(o0)dfIFyAHxeYWa=LtPKBOhbHrQiU?!E3iXV_h6x31&*^0_HTpeAV>68FXToG9HKw_4&vqj#=4A!EYm0}v@p}8 z)9yo<{QGvi7h*svZxtwii7~B+9sxO%$1#_o3Ia2xgHpXLogmsIfr^rMuKnqM99nthK{8(Ai-)3J4a^_9Oja0 zt2%^e)^gF=KcUq0E=-#2Ae{ShAm>UO8VJPBA3==}*Eot5WI+TkMS z9cVgx9jg&K_x?k4X@1*CHm%Hy)?FJ4H_i^^l|IX|+g|>{y|)42_%~yu zf1IDW(OjUSNy3?gZD2I~FP7`>;_ZAV&?V1sNM0br+@b`s*~@Tzp+A##I%vbfBk!Sq zR3g5%m_%C!s0Pp-GiBJ=h=2e$IQ%nv7DI{0;>+{D~4( zrM=`NW|x8G4t0V4C&@;(3MP(yjUc!Gl1qBJuybl1!OV2*nDO*stTxnzt!nb3>jhWP z%c}xS9qbT@{xk^@e3KFp17RD6SRy0_Ss z=pUdqYdZzCeS-hKDl(>Xm-(F$jHzyG(EZd~agVG9Gf6Dq<$vl?2Uvm0twWGgBaQET zg#5RVF)t}ThDOyg{D~cJu+Cy2EzC=IxolHN?MH+=_TWBzF-@1P*{y^MzCX}@yen*v zipB5W$Ivk+DfZ>14*OUx&oYY3glvvF<+SaC`mI-Cz13!X+C7y*jrZaW#SC1Yorb%^ zI-yajwk~gMF6z$oLHE}uxGWh1N^KHG`Pl>)gluX@%rX}C;3q6>Rpv`QBjL)PC zAmSS)aIY?Z1FKW8BEOUm-@HO2R-z;y$xR(JmXHJ{=-P+dcJjMDwk4F35u$B(f9jZ?xfg+tu1ha zOF}j_&pDocCH)gRKmTx5Uh#7CJNnGVcEr}Js9 z-$UlJaMv>K+=r9gE|D&)@EQT3Pn=j}gPC}zs0vk1PXYh$KIrs0mZmOygM%AJbE8)m z;MEvKz9Cv2{~LP@3ig*mReu$XeRPfQwH=AI!R11PqY9$}Cd0Zb`7ofm0zS>PC#Nq4 zB8P7u@si9v2ynEZRC{Uef!cMrelrQB_Kbm?hce7AC5?LVZ-LFQ_-ioh%}*10#+=Y`X~wm22F(739o0x};G}|UfS-^;sheKoQO*iV z)s;!IHW|Y*7*ldd$BhpOa^CI0;Zm0Dg7yHm`ivuP?)ZgA+qKa3fggJ;7!$%-WJOE2fC*a~6H8@D<6YQ-H<;DkHh3f)?%PG?Y4y-#Rbj{pp zWVHp29~VY0`s%Fl=OgZ=auzuD>BHS~PD0jsJFclWqA;i35P!&(%~lGbnTgBEqpA|T z&R^thS26KtDOJ(byPNpSUeoEkS0KE&FBYwjJ%Q=T^XOXQCD6Joi{<)a@zC)#_)5LAm(Qv>X3G79MwwVyk}S2zRepZ1dk*0)VY&4O4$1Nf&TUS2f&R{5% z{>pa-565XIzqmNOw_%CZ>P*^q5J;t5Ahdi=IsJm>m(zS&>cCwB?-{Wu3|Py5JMYS%$=*CLoa zD?{|_&(5525oXSKO&ziw_IFKwsow_GwHUth)3T&umx@ zO-1|gPH`d4neD{2tY$SLa-wBx;r6bz`0Ge1U$erOA9w#UCeL%GVM7>pM6~mrTTXJ5 zGIOzW@HtGM{v9Tpe1;(tl_5<36V_Sf0w?UfUPVu(?_;DnwJGIXv%Mk<;41lYE)^c_ zoWtzeOi6jz9QNhiO5A6DmXB4`!@cQgwBqa(lCx5xl`cOZYW_m@Ds%uv8tcN9ORh|= z{1+_I=)?707jWUBB1|7=MgbS^;M?(~^g#X>W z5*B&M(`>(=xUAZrHI11^T}{?t@OB^g*%;AUVNQ~f(xiMb%AB8Z-4hkZn8B%vDcsgy=kfOg9-eAs0=ziRy{w_a9^R%vKY-Hgi}2jJQhJGfu+ z?QkmOdYz)yU>Nl!5(e)OX6X;?*}>nBV1M6I$l97n%TCw9^KH8LCP;@|q7Oh%M<9Qr zXA<9T*^Y2-A^n_Vj&9XGP_ROSHI=C`-({{eRp}MqHRA$uwm7`Nu_{0!=< zLf1~p11|Xf<>l{RfHlou0L;g-EB$|AvqBKp{AW9MaDEKKx`f?|JGDqAz>5KXm+SHTe07FB6KxWEB2(KE2Du>+B`H~mAEIE&c z-J-wA#_xWXOC5oYnJ)3D9-AMD>W45m3I3wH2&{BP-GwqW%>tSc;_NJ{|w zk{%?lB-r+RBC$5X+&;DeB*H!MLF-s7@s6QcHXWEBzEiMANK(+=W9-JRhr+Hl6r;Bo zu;Jk^cnw!2cEI8+>=_`9>AB_9R5_CUnfC}9JT>sHfgkI*8Uw}CE^uZyZgB-OywF&< zQ;*SV>&;+6yZ##vL1pFMx!`*r#`>pW~7Jb()t zIGoPk74u*AAK_NMs)9g?8?dYySly7Rbne+DxL2CbAC+(e9<( z`mixNnv6=#+2~|DR;=iVJCe;{uIDiJ&R_;JmC78#wb6#0#Ih~i?^Pq{it9)GR#V0;ZT${2cMc@e&Ln&= zDG0hZ>X77zJa}{X13U@Xh*{3fe38;V-2O&`I^t&1@X<-)LFSIEY5i8%5i5!LOOHZ| z&vbzqTS|6EA}~+w0la%G$p(J^g;}q+qwZIE+Ux!j{tEZ^#gZRjVc7xR<;n_DD0+sL zlFn#wOOtP6nRF5n6Ygtk2o&sR zV%Nh`RNLo9T2*;CRCyVzk(a^kar?pJkvth6i=yRk=J7KhTZtE~HKSD#3bf^XCmz1B z1FsxbWbM(!iu zq37x zy>S*7^JE!Ic(@u$w!Xlx-?TBw-i5y|zKDLS$FV(5c`Se9J8URegb(zJ_{arL5UszL zGwcUC5}nR%-?9=Nx1I$Xjg>Grd_6l|nn(HRceo9H*D+wOI;~BbO4G81Tzv0J@S7k! zzri~Avuq8>84q9<8K#(c-ycE>c3{wFfkU@^6CUqA4TXRA@TWBDasDm|ays$@LC1)l z6WM}hM=uV&W5kYs-UspDv*7)QV)U9AhgW7u(vGyzaBjpc_<{X=e&sXh{gsIucHf0_ zeQ!DUxM!&LLy@U!dEr=XK{_@26*p1V7r6P)A*3^$MZ9%q2QGRt&1WCrV0So;^}EP_ zy8I4`h5l!Qvm))ks!F?_`$LRcBs2}UhbvF4#T%WCV7D}nC5Fr=U5)t|>Sqr!4VU={ z1sBeBwLZ(fGKWoAID$?6Y>#c0K3q?%2PSO#$Z77Cf%`^0roNpZ9?|=Y4-L(OWhcCG z=Yj+1u_>OSx7y(wk954YLI%1**3%k+iS%Xe5Inr*2JZ(&tcb3{WA#Mt)_n&mN{PmI z^OHp$D-5YuQ=OhK6gb?cRG4q`e!AEd#8Q1F$glI3Xspng?Xdg<9Rj~erF=UMh;e7T z=X3xURRvXi2Cg4?55H*n(W<`Fe6VB$>#gX7vtO5sCraMoy2WPb*>@ganLGli{u%gF zdK}v%*u#M zQ_*oaRXiIKOw5@377cnh#*nu+G7$QGY3Sq>4Vjl7LFUHIY|<~``!b~h{A1l{ch*I; zT%ZA?4@aZ-t4@I*xf5z)g)UvBCw;y#4WhS4v!!aj@O8>=eEv6pMz{~7-__?)&nyLV zpS}?Pfg+P zwJWotXkN0kiX;y+?JqAdfLx0tIUE`9=i%T z&K2;cycCK?yTFz-fgARBKldn>X9FYs znj?JVjxy|-qeXSy?r=cJH0`sQ3ZD7z`A-ghW zXBC67+26w+WuaH-I|z1d95Eos%s%Igc9$gQnp<41*Xz5mkE4rz;cRo|Z0=I)R$LLhjP3oO zsWb7$>W%uhAtXhKq6|^Sj7^5;>^ljG=9C7dQ5q;klakEJ5Rsus8B%;Dsqmb=l@cjQ zk_IZ3CMgX{H2lu*UF%)z{R>#f^BniR_vdq6wNoiKbRextsuu;BOY<_PbXciF8(uC< z`ntzHZlHUw3z+!iyYAbakMmUF9$^S)Y^&CxXl(T}1aoF@4*`u;R_OIwZzw?I-d~i<3fwki?s>7IkE=S|vCJC|;STYmF_u&Th z*=&TBn|RqmVdrt7hy^_3h&;b?c4L?0#F!KuGiNbrxqQchN<;YeN0sB@ICVy(i+(UU za&{fU+WAX-dgDBb)Eh>7FX+Ro{$UtrB|^5+6cV{IvGTJxY&-VOeY(HyM|3%hGtXiXFyg-H~U1 z_dG+*R6Azhewba~y%X2_tb(&Y7+Y2u!^As^K`h*{B~#uA8S0f(5UxUduE+2Rd5SQj zb0%Nhm5oEz9l;K@cX;a21Tydb0`Gt1@&T2Z;9q5lT5qQa8Df1lZkZ#kGv9y`qCa8* z1Ptw4(%CDGNIqAu2V7e6_-Ky+`ZA;&J@yK=z|^4>YY__*BPW53`%TeanntaIwhQmw zgY0m%57R%N0)dUmAb#42(Sh5^3lC#Vj|rVJk7gfDp6#D%jsVV!?Vivl+(k zzU1uv^oaSdW3y-4u(NtzIClIRa@x3r4b4aa>v$z*bS)hA+c`2-UW%Q2=z{u7wNPS@ z3|n3mjDK72bNP;G%z22wd-6EQ=@grR^XU!v<-q`Uvc8FUcw*y3BsbzG0boX}XTT&_q`kA0c!z88=XQHcu{Skj=b&*hu%e1qlz&a{706An_p zf*)GiaNvLAvD$w;xe2}Cr^`J+)4>Pa15^0y^pjwI;ugLW#o!EMJ2pqyX(wm)g2J0; zXn1rl6$rf+t<59Y%k&{!iGB@d)i9nVhh4{I3PE!fEIVRJ|`E{xia zHp6nnLLmsw#u&2o%jMbF&ttH3?QirKcFUOOJxtj=?l#W|_xTU4finB8?U{N1uvZMl)Moq>!PrWd3>^V5|WfDrL zO{ZHXo8Z5_*TlbyVnC}+k-?Q$m>MSV_>jlY2nS~RF&a&5Y?$#IN0!qdaPdPQgT>!4 z2%239lMCh9g2czT;lm;tyJa>l-<5__XJ)sBUe&=={5Ld9 z@DlVu+(K(Al&OL9y`B&kFU^v!rje?FI#XMA7sHg}VOeejr``S!_Blr}`;|}mLcbU` zbDBTaRqsR3Rnl-TJAqBjyUclBS4FF>nv_|kPh(PA;OZMI_Fm!z^0G#F{9_<~N=f6K z&!5AZbLB8{!e*AM;7=2#iCDLr43o9ig^Ia@Xh!Z@YVdgj)>GUm%-K-r0ISorEe2E` zmW^i@?Plw1E<<(hL-St0FOn3o0B<({uBE6m)iJ?=f*Sp0S&OK zLXPQ%-$S|4FL-?yM*R}0?D=gTr^y~iyAO3(+3*PKXCOCsn*lu#9031JYB;UaOQ=w} z6(qJfk^RsPlrR>dUk?K>x50bGM+34HA@gV;c0{*mo^&`)$>I|6iQ(b`e$ z`>kb6XN?mz&$OlkYu3_$9m8PiZYAu#piRBz0(^RDB_Xysekajfmeu0h?ox-yh8u zOv=WdbAokUIQQ+EFo!-bF^7!0C)}T;?If9Y23^1J-Bed23b1s-Yk;NZaqGC3#Y8+M%(W;3}^ zI$2;J)|B!-f{k5teizlcD8oI!Xz@&@PK!RD#_Y5j+?30U=FxEo+{WV_a);^lN$5Fl1dAvRp-TrJ;K6^7ISIQ{xcK65uBFXboSL_V zHu+5@tue!aj2~cKv;{n>nL%zRD?xGEDmKh@E$N^4W*&QZFj*x-S7WWo_@OjQ9bF}4 z@AqO#qy@7&l1qW7#=>+BXS!o3Lr*715-(Wp1LJb3ufUjEs+O=jO4I55#DTPZ?-z(2 zCd1afR>3SqX-l6A?7CR2bQ=U##NW6QS{(g3^~<+8)Y*o3pFXeE(XP=9ikF}r4SN$ z1z%+5vu|N`x~{7y|J&(9Ch??=4o-fX|j#DB)VT|FYMyGN)!RId;u<`_OVQnxTU} zVIQ$ly@i653U4hj#AE(W_R4&8SzQ585-Bp`Ib^?DM9DY|+HU^vBYf691{t_q&J0Hmbj& zzWXNbE7496CytFxu^9oWQ=duV>42MN6Z+7Yyrf^+p~Wppqd zPwj@GYjxQuLm|`p)Q~NU>%aq!%1mw37PcUEG--*7#K~Uk`7^Pd@MxqFWoK;QAx@Vj zn;Wu+`}Jt!mSWiY$$~DLr?L5BXNsw>N1JMO{Mg}6c9$Pw+lyrGj9gd6nN|rFY@<$b zq)tC1SFzi@|L~lUO)mH=&94h+fL@9w*~Ra%VVe}i6@;Sgo=X@uONM>@zLswFYto9K zDiEd5W-$pWl#}a87bidFJ&QMRXWbWpxY3l_ZFbP8(thr9hci7HwwOw~4#UAW<}74m z3^!Bq0^Be&XKHK0psv0e_DsKs!&(XRqOS|4IVJq5*Z;%*%*+R}xUmE~Y%vjiWg zD~R@KJ;86K^?b?KY4B|SIKFw~M4B2uma=b7!x?)F`IgpL^cOEjc`Yf3E8E3+>W5H_ zTqBfBNxCU!(amTkA5 zM8*?s>Er}EPO0x5`Yb+yvn`Hb+=Vpk96yrX-jfZkqgCne-9+??SEQ|$zOdUoiT}Oo z9fps-i|RH?w0Ke|nGBYrt>=G$N#}HKxU}Gfsa;HSV%O6=xvP+*_ZB`|a#Rv_8a8wV z6I-Ii!iSsC#QWOJ&UzuuJ`#;foOIaFSHkyoS_bz{#hblv9t`ugzXG==d&rLv@~?YF z=K9#<816fU>{F!J*;gy!LU{=c`}`e`uQ-qNR11uD z1>-d788mz45?Xp@E6%NP;6BBU3p}84oZw(s)`6S9DzVR1^I_Ps z`~0m{V=+%!14-WtF!a6ZMYJarFeeD%Q{51--bk5g&&p(0pQ9L)x{ ze?Xi08E`h!knQm1&{x8ej+V*NaF&4cW+kxcdORAmZ)S0Q!ECeOn`p_=f=#2IS8SRh zFeTi#F>z}>6mp$>eA#gN9`+r+1qHKWsbsKy{0{rS)j*YZ=Uf?&43ZdEoo5kJ_HK<)U&rCDZAwodde;SOjdZksd1KQ?m~aHYt*) zFk{GG`dTox@4|&uJE-dBESj&q8Xcck!E}L<-7j>gx>hZv0=HEnElo!6VQz8|L z_RnMAgNwJA1nnKE_UI5WW&<^L7u*sHQ9*CHtR zv^pgIaAvoz7ee17A-9$z#}bO8Kz1ggak3e0D0Cq@n8FQ}?ZZvzLq4nD!dSTwmj1RM zn}xVV&2N9u6xfkB5?9mL%Tp2nkFPHgU6(;lx zZZ)-^P;)*8|7%+cXIIJ)jt;?R@73vz!eP8%ZHx8q9NGEyE0`{IfjdKoVYc>Dd}MJH zR~B}l^s5klqq75(HLSwN**X-);>h9Pbaa$(L&IbB`03{?`Vu>WF0T$_vn{W4&ej}L z+hYWG=T8@#d0&CT%@gR>;yv`^z;<}Kynuci@8GN-n6rb`n_1)eHKbUU3QwLzLbcG( zeki-0ERz&zpm+*Rs-J}FqoUx`!EkzHH=5o1ro%Er^w^$oSBP(_$BNnBurhr&HE-bP z{gzhHyD7&^6n&}X+aMe$+sAv`c){TLM`(c(2Ua;<+~4v}?(*kA3<Xr{dn=Jze(8d|q; zhT`vB)~!y?Dfcc6U-N{oPVGUJMW1me9%D{I&!l&X7t7ptl=nL#%Z`~!kXqIv<~7C? z^Mq`8kjyN$$tDUrjzoeK6@c-_L9k$<7JKYDknRsaZt$vwG$?N*y!t$k7L0D=W&Xso zZ69putLsuoh;d-kst)2ze^Zc|aSrFMl@c#7FX2_EJmRlU*ugd>{;YVf9Yui-VNe}& z6Etr)a{=zF=~wq%2)ZA^zPB4uZbAsC#{R*prz0R^$PRQ5Znmv|C-ji(6xjheIkH_h zlwH|ki`^sCV8_k3LO$x9@I7gVs-plQdxU&fvXF_3UPI9?&dhSE0o5=3!S1e|Mn}iq zKrp<^f7Kqq8V%loe!Mb8HYmeD^LE((TZ?7qzT&6YtVHXBJTG_8h*Vw`LVBhiOHvw7 zc9~HyxB3rnbHR|^oVHqIYc&h)j{QWZfZM#2G|?%S)UUH-Tc0)#7c(_uR@pbw;i~Ir)JbF()kx z-nMfXIZ2z&^~{Y=o~SS=h2niMxD6oo&0fjHQh$ z!JM#juxqv8$;!~BX5VKxb4V*!zUl~m%)0=+^OnHRUpF!0=vcPT&=F<@DbEPP9?Ze4nU;GJv?{s8KUe3m;cV3~BgfUzUY3GMmZsk55>j0V8{rKX`M9^QG zf#>8+@l{`#sH5oxTFeZfH8ulTTe?2x?o1ce{HG6o4k=()rp0=s6&M@R%hfh@R=7GJ z0DG0;u)k##H#kg#6&g&!&~GAEceNAt>Ph08QI9}>`W$AtO_PZX0&#w49p1bhiGwFE zhnEZUam&yLP+~ucA29I`X!y@T=~24u^6+=?G^31ruBXa4!Qk#`*uYzvXy91?Zgd{q z%KzGF!72t_gX;qu!7S68$(JQS;gTSbniL~C;}OT*F06uvu}!$WVL1(v8Og%N3fx<_ zBV5EHQ})+B560b|gwsj{Ze&6Y+IU-#^|xzqC?kTTXAfsT##{40?JJ<4g`0y$Gw(V3sHeI-uEOLLHt)Lv z#dl>P*nKAZs*=Zf+MU4iMMmPExk|Wn=Ue{h<{6~gszyg_^PyHJ16@Spsc6GeaUGut zL%I*pzm^$9*Cvug&pY5-h1t{7bk5S2g?x+A1?xpMM+Pkk?>k zLN_L1$b7Ntz;C!d?gLludIkToDX2at~v9jX5m_20-l+H1t>bo0Q*7rpGR_{)qKD_3=+!*g>sKaJ?+R+yo zM=H9U#V(Iaqm37}aP*EP+}T5KQ1{CpF0lGP{MuV8FhoP)O7ACr>Yu+{T~;slm%oJ# zU$VK-&0FzW+Z*U#V9)9n|AZq>2JHOMG|>nl*Z1;W0&0(Oz>~**qsCDyUN=zK_biWr z+MFS*W$Hc{suGWNH?{EHnGh6S{>*SkGW`7euA=jNJr>;S0GCi1P|O~UNkuAb^A|%} znKKQ11`mZixA&mbGzK!|!?<=`C-`vX6htRJ2eZ|~Sl_dk5aXpxCBn{ZpTe<*Q5m8 zc>bH%WBUkHiylT2GJ3dfk_yiEv!mL*!||0%BHqaB0OJ*H@O|24*qt(%eH!G;=M1QV zl)^!vUX1*%4tuE0JOnM~li0p3s|1<=s-1JX`C;{5&?Q2Wo1+>#f2@uuc)yx&=Z zJKD$M?G#UTa{nhB*i;0wO`Wiz*Nsw6TeFLmTI{Tbz;2b}Xr;jP)y@qXSsO4$xpnhZq8bNoEOcy*^3)jjv+^#{irv=RG4{tv+gMzOBQ^Y z2f~3K>V84XN$Z%ySp}&0Wrjy$m7qOBnG4;<;hd6rRGCYgn2j+#N?S%krJfLNb75961Ha z!#d#6os~@fODrCbegxhzUfo@67}Gp9&<@t_?eae`qn*V|`~2js5cZgnkK zF3fE^?T1oI>`1o2*MJ%yWq`)%6#jtaYI@NA690@i4QWx2P`cchC0~!k%%{iT*u5gi zl`6*M_@;7~O9^82XL+bw`4Loh`%~d%3;N?2#$C_m*o~_;mn2{F0)V zDem-c?MAw4@PeOeEy-%vbm5Ij88mrM1r`b4i+O(~!Mt9Wy9us7?wC58`#P1|>1Rcy zmBp|$<1REf3c1AIe!N>>h7^!B0KLT}d&8?a2{?_BTR$~iX?7q*1 zh1}u_qP6hidIPR{=WsUk9tTR_{aI>FG86>c!F4lhJXXCGzRlHT=?atJgnI-EoO-PH zoC>z*o(X}>g)HA&pQ$cP72R@_Vy2;`9G#tH3l^invsQ~r<0|>|%Y=S|TcE(pf%V`1 z4g&)>fPRNLTV|XOsrv+vY{3aw{#g^8w{B|S~w%-{H+L0YQxw(fdzQ61|knO`PpG~9pMZR!G^%;EY5Zrbi+O+YnNXU-*K*8-fY)XU< z%Q!oUWG|b-#KtYceNzE6+}6{LNdkjv$}Kq1mP!HtouH-Pt>Bo6h_Tc%@#DEC*(iM} zs@w7x4rjgMr_FwZbBm-PYvUnQc{&Uu`>$|U-SY7KaVxS4&jf`B?d3c7n~LAGKM{SY zHloM=u^8~a7>8eQLfg|StgTWP`d8b~;$!Yw#UR`;p#)VP9v4fx zNr}d98w0;>Bx%W`Vw|OM9!IQQj#`K&#Jl@gd86a{UAC#_ZY^Y(T#$^_~d95Z$kEd$BTgeV-+`m&cOn$~mycz8F?UT*Wmz z{wu3JXTwQcGGLYg!Kjs_1EJ$SV_8lMwidKOw)Ai+SjEAm!~IwvITMtA4#xH0Etpb- zH3r^uVL^rfYg!*-#2pFoDE$HCm_3`N^(!;uVr9%fn!w+Ng?QlaSlGARm+VLF#^SYd zwCMf`c%6`oo#Fb#Dmt;s(2xckR$(PWzkF@5V?xH?UW zef#PLQ#&>3kg)8$VYC?3-_1j<;~Ur+BMYW8L6_x5N5h?Oqw)62r7R}?IMmyl0m#jR z=jNx;>QanoKzbrqUE@Kum1p_PS)0h{?helDtRd~!8ZS1OkjHLVjTiV*nIPY@hSn&C z)831ZF+Wii^n=6b?<#!?32Nf}UQ4ng=?`(C;5Ky|y8%02&!mj)NumK^9ys1inGH-{ zM|S#UaBAyNw&-&kW|&HIJ7blZ{(MasX7>RNwm#s3j_1P@-9B!zel(vyxr*~VFc<^l zg2m1~LPkQELH@q%&A0CodP}A{6dh*HnWzuLg@gLBXZi(bae2p|m}rP=uG@k2S4Fn$ zaX1CLyWyba3z*rXE`ExK9)8g8fun9`xJAEibM0$K;?i9S_{}MhD*lFIbHy2WJLWeo zIUNGmR%hX;_k+;aOyJASt`aX~39vvh40oC~V)fHAU~bXJJ(#G7Eg@oDV`)<1?1ahVuo3OOocXoxqwo#-WH(~JjE^gG6B+hAJCuqBDqyQnit#jreuJ5ZCKHVxbz`%fQ z=~@r+&!334zMI7M2!4APX+!QotO^B`+QPp3UO2u;8n=iq@cG?oFgGh1-)hfetp|c> zPWpCqJS)k%K2&j&f6K5{)i1b$#~r*@{UY$me*{ZaUgP>VHmu^4IT-|~fZh`xE^eH_ zw*8ult1*P%<)X{fm2zRJgFAXxIf1tBeyCl3TcmJC3o=u3`Mq-lZe;ut*6;BF@*YRB z8&Ruip@ud2&b$Zb3SQ!-aq6TtY%TAIhuP1M7x|b>23r^Y2FYE2AU9cX2`*tw&%PU^ zo+-02mY?BZpe=hRbcb#x_rr(;FYeXC4|ukEI17|N#6BA((mc~hHuK#?e7JBHzLxO9 zH=gA*^hW@1;+YL|Eqk%hunh8s?!pZ^AHY@VnHK30rkwHtrH zCuMyqnspbK8Rdfd&BM4gZv>NhsDz>{C5&IZf!sP)aL?0si(A)S<&2ZC(O22o8icTMZB6d6BsLx;o>SM+B)5cO&NZRs}%=Bs=PM+k-Y@( zo{gYQCz`p3qkB+hGx4VsWGVN(38g;$4d8O8cE13T}$lQK5!}y0|(W?0&C$p)Ow+R-`lao&Ieda0LvGQyzObh4# z$W3Fu>Wf%+Y5_`Z+<-5FpMjCD9kiN0<)`#(k#s->4osUtRi#C6@zhAR>5C%QcS4P^ zYjG5qA@pFMIq=*ZZB{gTDnC!9jr(x@6gHiE&*^XBIS=zrF65br?OHU2rbkynQuqb2 zD5AU~h_!>8+eFs?Z!?tlc$2(B2-pl!r3KGS+4WEn%UAKD;$CC2DbK;z*Q~K_lQl&j z+m8$6!a4VkLzv^h0kEohCV9$4lV08v(eV^-`dw^BKMUJ2qG}WbX?Am$wdB~pE_u<_E8RsEhCMC=At_~!bC2EwN`xCS` z4P@53Dx%S+HE8A*b>_bPBV6yvrhVUD!J8lLSgtncocE%tGVfbH*!F8Sdp30znmshbt&XR0yS6>ux7X#I zDud{g+#;;|9!6Ha%c-PF65FDd;g>Ws{3Q5zPuJ@Sj_}*)c%%|N+ey^7PYiLhC0X3_ z&s_HhMYNhB$G$8!#S#-ImL>5-@LB3W{}v?}DrCrB#vr&15L|K60!LtN4)L~1n6f~R z?hP0WrK9t>jz&G0d`{>k8qQ^t4?PCyC8sMa{`u0>?w7zMhLDj*KRBVem z*&T~R&&!kD$cvzR&y};4I0)^T%kZ3VCRY^tg;^5yIK6QawzMZ=T-ItdX!wj56=sll z`~-fHlOKsn&QL_+2G*8yk{{otLW^$S0JDYtusCr$zdY+Zw0&EN3nwc8PyL);Mie|8 z<<2_8D{w6RmZSNY~vC3N;3OEYc7 zT!xi4wXOWZhi;dl))h<1##)7)oGI*JI8$L}3QX2(FpFg=xa-UdKJ&?Y&^>qyxA#or z_VL+hq$vEK4g1U02w58Y7Zu!4-+5#pa3T`CLg49=zu;u|F}&%Ply{j%-H6! zE8xxM8s25w37Ee8KK4oE@*3%KEG@W^yT17=jx0Cf-VF7#_3#khcbApvS>jfD?QBg7 zE!wo~dodi@x?6acKj2;qyIZ|I(bT`T6P&Yv=0_tqolAl7mycuhNCy~oV=L9vnz5U* zb5NBNJ_ic_!0cWKOU#jEy~3XK@9YumuF!uhS~?YjSP+ewuD~~cEf5&|dSv{}n$5F~ zV*U#Z*@PfFs()R;@!nP7o*U0bzbZlV6UFQkFT;)>96>QRCy}+_sc=))XP7SR4EGe; z#NTd!pm*QETH`!C`*;kTD!r+pdKlZ1nF6ks!oH!`f@(|B=_q93MwemaF-ML+pC%HJuxWT@oeDkqsen2KNiezT7JB8Mq4|&Ma8juzdVGD17w0!Z>)uPkZodhSp9_ZH z<0SCL(H!W$agV!oJ%n~2PQ%Q>#`H$ukyJ+xhUt)vR@xh3)FGksCAfsATsnl3ogsAQ ztvNo`&c;}4X1X9MmXOqocZjsWCNrg@IA*%g^ZO4EEdk^)kWj!sPQ<_t}n^x z{PGJZO_ayvci(ZOehiuKPJqhqd#Ka!F1%H~hIeaID?;{f0Jj0F;8DyAyt$zP9osHr zzvE=6yI~6-WK&>N`$>Mn;=yFe4WwbtIWXb9E=o-2VfRrnhAhyeAG2q2ht&JQZSNs^ z`RgD&tMFioqEJr9J%q(CdMlQ9*CL&9E10dk3yn7Tgw}8j=lVo2w>d8X_CH09o5zJ& z{Xk0o6bTQiEyQsKO<2BbH9Wf$4XNp8LHej2u5|8zkW5+po%s}o77Ogx+genjzKpi_ z*4ZAv*T=u{U5Mv>aH`PHBY^It~O^V?H!sD=jchqp+XrbD|{CROWJfCpC43R_^p=z8z;`jfeZfPUhzKSMbTj~d z@dEDB{sp*ZOEU2PP_$v>HkioyaOXxp7wexLgjrjDSNz?ITP?Uc2lh2f1Pj>BYoT5ns zmlyj=cHi||{9OL``MLi8=QnGC-|YW+ey(;A zeslik3ED|Wyu4V0aeH2JENKUO{S<^0h#Gs6XoNd+df@Pv1WtYG81hxDhRdUWqWoKV z(y&XyHOgHOHdmiq*9mPOg*`O)wIO?TD2_x)BUx8>O9aVniS|`pYtZ^ANVNxuUIJW84>N3~I#Vmi|Tllm|2KCGBnZ&0?xR>h1e0D_8mxx(ftoSBC&W*%N2g<1DgC>v19 zDcH@%D0Kt&=CKs6lu%^D^+vMfj$qFJN(y`SW->-~zT(^qFT&?FaweOVn~opbf8qG%L!860Bf=C!Br;j|6o$MwMnOG7 z)8<$rbJ}o;rmuTI+k}?JpM-~SvU~!`^=7i;Z^lwaNhw!9J0CmzuVB7I2y^o+g3;24 zSjUz{G&kIkWVTA;Q)llwTd{TdK@=xG| zD?m_s(S)eKkTW&{*EOBPFLj=5)$KCOG+0JITpD2AB6Bu+PBc{gr_J^*wq)CH%2A+1 z7+Zg{1xGbUF(rK`c5YNUeom8S4PE2eG^caeli9@Wlpf7|H%?*)RvUtK_GMVVa4Y$x z2$Z%2cQVk-qRkhQuvYI3x%{o<4{WStGlj(4fbvQgj4P*Dd0oSO5tas09HhjccT>P*UkK8U4#~q1c^#v`g;A9eW z^OGU9)*75*dLMfZ30qFhmGq)+Im@pb$clAN3r42H>}ly(m?SjbDyz%TL+vdd{1C6hSlr@z;=0}Ic)58)C;iNN+S$S^f{5QHXKh;cu$f3*Tgw7Q| z5zc&N)8skZRx{>wR)$?sn80fL)tK1{S5DVFh}0VEu(9?oY~JM$KUNK-*4d3%arP}t z+t$vt=Lid=YyJzDk- zeqKJoeR?v5c+I7xwUgr}^bCNu7ytM#Gee6fTuSY7 zvQCg;o+;y)_J9^nHu*Nwlq-fVcUM^3pvLMhRq%t|PT|7lPEk_cIy#{k?F+6{yFc)D447mx0p4rhEm;kkR^_@L$j#_7(YJ)3t^O13$jyCfy{4|V24 ztj@xZj?0ky!5h0QEXlJs65rl(WxJvzaYbJrNdMY_`AJ`3$jCbW96JaB^QLl2YDyGo z(~jQ9l9+qaI?Q7Om|LVId+=LCH;2{XoHy{L}+X?Cgh?IUMZ^F65*p+6bxe zN(kOpjZL~5WVJ__Q0GcA*T7?Fb$J87`ba02bzumVFPCNq?)b1DqlYo4E!~j&a0JU& zOoW*G)m)Q82iL6=&)Gh(=cjJGizB)>L-oXm@W~;De1?x@5 zRTV5rO6i)FL+vf<4;SjM!;1I^2a%Jm> z%d&X_NqFTbdt4lEMSXE|@lsPPn^={Jy6HR)&z!<8YQMznK^1US%K;Y)r%k6dZ{TeA z9hf~f2r@<`qsOISsJuQ#*dFRJizt+>+OZU`otQ__ zpS_vFVq-AxRAwcTVIXanz$wZpV29#ZI3+ZTKFM*oE~tlVO$de9-FNv%!Yw9RF_@*k zHi543DZ&(_9kd$UsC(2UwDP$OHgh+y-Krw?aV{{oRkF+@un?!3tf0XsqcPa$82|3{ zT*^^;!ev#LqEhc&QHt3pwrSOMe3cx;F6?;=_ss{h*Bf@guoHvW5yuZ)qiPY_M{mOf zwJ55)xC{=Tjo|Q23c~VcnAnsGa#@=>Rf%F4dTJJ2nH3Ih8Ftj9x`R}H4?v&gjre-! zK5zqD<_6{EGj%5k35224{Ch8D2iC)*##Hhu-pOR*WB3mqsw~7l4P}BV;eO;8=##ai z3mYO?O}-Rs70ydbF6H30k7L>V23?VRtt@F;tJ8hy-&pP=$(}u%Ng`zp`j%osaK0eERQ54138Lps8#sZGO6hQz(eT$sa`QxrRXXd+o@gREOZ@ z_)%={XFZacxdL=Y4g&YNb)s=WSK-;pfwcK#1go-&<&HX?hbKE?*u71^FgErHjQls8 zlDhUWr{i^S!E^@eX|x6L;52MhOM_|+VX8cNKdV(bkN3n!nBKof;(4XVF(E$=*1Rvr zjr!-g7v7U8ZB;kQN8EuG#g3Tq-vptJH4HSji$DRkkXyEp3b5(K27&Hjr}-E+T@X=n zpja@>_oJ=xd4V$jmAf*1B;9YH&NgkT1QX>L7^Pdp?g?Al2Oj$DWays%$JLvEQ~gGN z|7MCx(I66$WK7C*UVDoMQ4vvODvcUV6{QHtTv9ZVqEs3+kn`HRL4%@s?yb2r&l+g( zeSN;`{@g#@|A4ieb=GyRYwzdt@qil3iI|&cCq(9&)9^1vux-x|>i_yJRbJI4Sz8w9 z?wQDbJ1fajO$oZr9?c)U;?VqH0?vQ80dL*DMn#f+>q7N#{N|{Hsv0u!fx~Wz0(t;H zer%yP=Ld5^oGmKO?v9R6gV0K2O|5HBf9`eiITQ`(4W}MhVnzObTDT^Z_jk^PzstPI zbe%c$SgFm+{WhSL#XRv$S1FUzr#^{gnLk%S=3OJ&6;Vjj+R>cl4sm zEYAHTH5E?hQ^J|v{G|8*Sv>tIn%#IyM^toh-DXcbQ@$15mDO4OMj1T1bdHV()k4Yn z?tFdM8Y*!61N+t{!-V^-a5>`?S^dK1rp+GBD$J{fKLz5)w{l@fu~4kn!2$*RNE zDAWt^)O97$Shy9tehe2b&d-$hQgPvg=u*jYc^UeITq2!evpGU4o6j(;*`QM zDPgIse~X#;A~*xfe7?gQTXU(6ppAzQ&pPT<^ zWBD=IctbK|&CtNsZ@cNqhF%8Xwh{{SU(DmrTGENJDfDc4UtGH(52x-N&t|J@XkF|eRPQOm z`tUr8u|6*kFrUanGP}~U;oDux@^;ayC$^BJSs?_j8;>2nZ^OVx^Z06B2^|R82cbLC z$V9FoC9<7ygY97|w6bJh%N-o_cou9lU&WK`UGe?d$sDvQ8qZw%NWIl-_(OgkXsHBY ztWhGLKXMSJKO9PZHY8y8=NaT*0^NR39-w%czdkzCS9fkVIyYNnjJ2DIXjcgLNKz=k)6rAoqhqKpI(QWPt zx}W4L7&oX=WPLW=elm^Mr%4o#UzaG@`z|>C-Xb%&po0_Ft8uvcBUpT37yfoT&YYEv zUGJTs+jb`*>&{8B=C(U~9jq1Zt#TB<9{weigpcB%xxXmTZk=$uO`Vl5`$*fljnKTQ zH|WJbg9q8);Bt)-X1y+lb+?3L9rAaF1RgXygujdDa-Iy{p7s61~`T`BMm2Xr?_<((F>4f=uthVz$|{ zf|u7;iUZT4@n&oT*!Lya{&aBS3E$Mu zqZ^rjV2j5qVQc1Z$i1C|ljk={WchO1y7(^Lkt{{R;|@IbKpojfM{{k(4eI+y8MQQH z@ng|3`t1H$&|mVBB9Ev;RcZ*GTr&z!?6!yX9~xlM5>4(OyoI{rV9c_YHjm?tQO|B7 zFJ1US%vZ4CG1D}$o2?4hbvQ_gezU~d%R8mSNsD-ev~crV0}LPaMm*)QgSy>*FATY@ z&28;j(v+-_tNXs8^{WDeM(LgW>}|5VO!~XCh71-9R_4=RiL!Y%BEn_IsAT9EDWx5@ zA16ICxoG>w8Fwp|gkl;(O>|@z2gY=UE{O+1jKe(Z_op?u3K% z{-Zv|$<)KklH=0+xSOdRz5FbZ(Ib*!?iW$WX`U^tt5_;uQxwdRgK{}3PimOUdtloe zYj&+NL9-A!Sp{`tm+kvxXKG|{uK5xqw?CqNT^@kR@m#E$ln-9Zd$C4GL(~X_4{!rWF>5DG7(+y);i7qrR60+`Ir2Iv*4AIb9gK;VC@(3iu<^lYd6$utB6` z-_sg^L)O2c>;hK~E$@SMMT2NwmxXZXRW>zrPiL2J`C`J28ezoxK(5UjkM2-n6D5lBIwyUou%k3Mo=b;CvV+ zD%=N~y;JyorwqC{Plx3RwV)N`f~S|x<(9j049CaeGh0u#j(1Jv$RGTi%cj zuy(@%L+ZfbS~e-TAdb&eX8Cm$SmWClucSucLCH)scl;Q>|8_368&=WF^nYT*t}dAL z7}N~&wvfd_gh7tzux9`|iRo}CryPt&4Fj9I zTKG>K%NMM}VBNw99Q)r3NKE=6)=N!VuP96IlW!r}dyC2aq#KXO9>7L@{KdKvp?IrH zAVxY1wL>>-7p!wzGQ-Obr|bdhrnqmJ-n>wGc3ON7`SpM zgq_|AHiK5en|+(;$KwsOz)TrNRT*Hvj(dcU5#w=o##7q3s8EQTI2yH5bTIS5RCp4v zjaoJlye~0Bpz?x~#sJ8cB8Z)UInC6d! zt@J|hUXg?Qr8BwGHdrWH5W&$+0eH!=OtkA{O@?w04*%_pLURu`tGGism&f6WtK(tK z4QKK5%6Jk%PukL)*HhqDl?I-w)TD#u|G|}qG3YY0ghqb~ zq6KZPVz;L!X{*w6veozmQHNB7Ub7_Yl2ZjwyfKS6&v4+x!%p;Vd=UCw&?41+4q&%2 zf}QqMOPjs-!oVd}khCQh>R&0tQLoK>^werRKF|?+g-imza-OCSw&kX-qeyFUE1cBQ z$MUc+Zp-Y3KSL|zPo?C>Lcf`OM6l;=5tDG)f09(Z=PSu(jda~(D*uZf&cy{j5Na}# zr*2Tki*r@z<;GAlukFL0tL!d3a`Cv6wj56{0Ga)0@kG z#ZEFMJZKSx1J4KY^5tIW-Qln_16eHpd~zKGWCr24t8c-&Z99BjG@c(`TT5Tmy*YaL zA{;3!fw+?oh07OhFjnzBExU0{P?b{LRd+>Ovga7l=d&`d=bhHKcmb;yHGphG`-Tw z*0&0`w}s&m-L-g6 z+8kC^w}H{lwQ|o(ari)E1qOCofphPyf*(#^SX6A!v*(TGZ(60GKk7bwza0%)l0mrP ziVv@^SCy#C?a(kdly{#{1F|Y}CMKk?Z08pNmf6 zlse;D$5z4Z%`N(_kVbVoj|-(KenP-vsd=!`9xa>FK*vdq15J;}y{@UigiQum*)1NH z%`TR2OU*%X?KHgmAQtPTgwmNkrMR|zF_>3p(D}+ebZ6*1++X8FPcOH?{pq{tb+Zl{ zelz7%DYmNytk%Vw-toBVy#+q6Ek@OuEp&RoDNsB$3HFz1)4I=H_(mT+4oMq`jk|`kYX3Df z+{h544yt3^oTG3%bRk^6e~eVi6M5VVUG|OMPA<`psN*_!y#GUnZS#BZ@4`^_{PvT~ zo;8BX{IBAtL-lY#^(=(AKcSt{JaF-y=i-lsZ8T=$1k}iULjJdhbBpx0y4xlZIXA2F zH??#0-^MOHyF(;vbTP$!Pgjd|>o%Z~L|pu$ID=nhI&il>Tjif#SiCI3e}0%sP_sByj1&McqrX}Kkj={`)&U>GFfzyM!(3Qx*iAFeDng?XY9@^{&f<* zC5`4U+RGuyVFn)255hOn_p5`!C#bed{|S(hj-}XQE>#`s4bGd!#k<(htrUq|5EV2VI@xY zwL#yPJB7VJv{A6nBb5@sfrtCBnx_%}mwjKX>|=)+V}s#c{AzyWm?U5QML^-yZWj~( zm3*L52Tw^9^4YhRIyGu0f#n!ad^!3B6|Kpp^LzVY=IDVO`80<*->m}wmaaHSYO>FG zEfHLQug6YOyIUp27mrxH6;AHoNlSHijgt4WP0Fj#av;8 ztA~^--XNT9b{C924^Z<2ZJht%E&X;agVmPz>B-bD)M4`GeTau4%)OhT#%@`7b?x(G8c^WP{P@#WAWv*!8oAr3%YZ90(N{8jdu=e^BVP+ z!W>5voH2A5?w>YNY6oOOuw>W1E$#Pa59)Rz`i^xB;369x!=E_0H zE4Hq}pVEHvPjLvN-zsS*V~O1#8}q252;6}QGEah=;7%Dc20lvT$= z#48UvCpDOLd-kM&JA24ZYG#?w^_CSsbQQZ`PhQ(W6-y57<3lUvz;~xjq@Xd9|NcBo zuk3eFuUk0O3RTZ7y`I-EaYIvxWir7~9)MZ#{e>#`@hn)J3B%gI->9pT4 zj@agpMHM>iVzFLIEag*)*q84IJco}rvbpH9GIF>Hwka61`+_6%YQG{Ms;rSLFOO;S zZn==Yaw$Ag{sGCI3*g-!O>CW+%+`i)h15$o>1>)cUw_aM3tnq-l&@rGklJ6JN1ue= zv-JfoHRG*AllX740%YlL!O8Lqu+J3nXO|-R%Cwr=AIiDhk|NQy{FUg-#~#%7$B}Qj z`fxzhcF|#-5=P;BXg?5!mV45ub7BP@*wO|&`8lnza^ay8rE$8^N#RYIGb|}hr`pHb zJUR6Ug&HdG()gkH{@*CvU)B-qS9)Roqa3(B1bL%0Z_WA~h$+8r!2VScv}n_OY&qYR zd#4|wtUfiAb$cLcZYhB4@m?#+ZAqvg5Su z2)vcv0-a}GfxzW=MIG^+Y{yz{)^O>LqsR8;q~-JI^OkM$z^-;YUr!aa$J>&&=YH`_ z$`*PuXDoJhkKs9!Iz#ldB8bWK1;f~7@}7B7e74*JO_DO;^mz+3s%eBF?sGZ(Xbpv~ zb3uDEZ^%=S3{kiLgMbGEd0oOPEUm~VKOJLKlFX5D=!D85x?Jn+$E~q3Sbguki$%*5 z`Vwsq20m%LWn>QSxG&jI!n?@y%)8>lu(i0m&V~*AHSj-}%zF02=(U9#23-lpm1gJU zR&U(Ve!U4+X?~$ovwKJszUzXkZ%;J-l>!evthwWQ>Hf4sYAU^Lf`b~G6yX=cTkL<6 zcgPx4sUC-?Rp!FzKS~tpW&*DjlxTp*CGzTekW@c-aE6XMPs$&TSDxfUzdj0>cSR9B zhu1?{o0-%gAmC!Dz1yU~FG~Nwf=gZ)D81XprM#kD?-YcjAv?vPHOIhQuRCRG6CC~+ zg@d9ebL8S`nEz!9#5e1JqD>qwJCV<~P1+zfr7sqGo}r-KuVKcv3Di11R^HI#zUcVy z4k!i0$gaQ_>Euo@d}kyvQH7*SA5RVY)cNF&r}Q3y?gNP$5}Y;`v?TX6x>qf zMRqrziV3aZd^>I{)1VWe95S2i$MnJcUwT|>^oIT|)^ODcDaL6kM zw-j|^=eU)qFl&=Is`U}P&8xtn+zod13_+8!b|H2DJz?>Z8C2*w9NXLVNTZ~fj$Tm0 z?-kylpl!yhE&L!OPZvU)Y*|J13TWItOlH!q?x)`~imvO#A(HXL|FJ()_&0 zw}N&TeTSzrRCv{y)!f^$75ZHqj5DP@`pP^L?zvo>{(WwMgdIVck=d7DdG?YBX6Ipz zXQPM?dGx1p5zqgV0E36Fq$$m{bP(-1#e5{mcUVyHRv#W<;RG)(hHwuZ51d=|7VN6- z!7{D>*gvThJ@<5lgQB&#p=kt{{;}qgVIKVSmJhaMhtp>rXI|i%BN_!J;dIw5!KXM{ zFwlPw@5c#{a7_`%etIjJbT$a)p2;kzPQ_5S2{<^yiSA2u?Bun<*z=Vl&%CmRcTIao zf6ko|>l2Sa!MX?Z{gM_&zB^9m9|fZIwR_M~?oZBd#_`nv6Fix_2`sm`;DrNYctW-V z`Kx*0C7n9qc>?mm+2grvz#KmPV*y*|DY5=;PYzl+S00di8?N5k2Jb9n`15c_7r!;7KmW)G}25|pU5i-{2;=5l*;l-2!`rY@5FlB=pXEi?VqANciwwV9ZX`zv_wYXvKS+blMNCU@hp_CV9 z?Dahj<(XNvRNsexE_(?TW~ppFS|Y3uE5aL7kAmL8Ch_aAO#J@umu%A90Q}{?8@^j; z^8QM7+!g#43hY&Qa)LKJYg-Mu{)12{F&39in}cFpZ)h*=g&S&}`B&3H*$0dHQe#ku ze_zUkLX$wQ6Ss<22dHrm%`~#@mkpmnefjIW)3{<$I`7-*EYz>dMrXs0uI-B=a6!{F zUg2>NN~H5+%{DJQ^1dU_IJyT9uZ-v678~)kOB%GTJw^{pvsg(bRZ!bBmI`-xV}q(O z|5cjKjUCD%Tk%2UbIMUw;f8iso@HHeVzcm;{4x$e<((At z+#CyUF3#hgrgcKUyuCv0v1Ba0*DMddqrf>QFNoh8#^R6A^JJ)A$X9emN`!}vqW0uj zJY{zjBvfvtm8bgQ!e71d%b$-FRF^7VH0?-5hB|oXQaeRGdF1#kZ5G8HA4+P!_0Y)6 zfcNFCqv-ItG<$S!9{;-pY%Is&>(;Rt_D7(WGnC9R}Am?PWlnJ@S* zpTToiF5y)d_R-R^d`dGL2`vjX@y{(?&OEE(qBmhECb--nm-u=7;BLK`zFL*D6inHo zyP3;V$M3YwNG?1~nac}rb>kU-QX%0`Jdc}D3U#W$Isa-zW1Cr&zgm%t3kC8Sd=hkbp8~%n5d-U+GdnL&CeTR{I zU8rbpKmOA)fQOvg4*^Tncv5!<^m8j?7j;uMbWov-%f@5z7&lzBKuYlErO36z6u4Gb zMVbYDqr*uzNPmcwBi*^2HKXh3OSL7RbD02{`7Chf86f`Z0>u)^tm4gec(`vGss7kb zRe!(3gW<1W{E>&kj(dUh{Oc8RNNJBnerN(g6VIpfEuxE>7TG%aJ9e+G}M;#;r z%Z8ABG%7N;?zcqhEuW`P0UXJU+6JIx~Al;Lyg*S8F}SyD5)V6!19rTaQ}50+f-i_N+(<;8^w4EJRZb@a-<4ECT_YzJoR~JW$jkIFYXe#Te#|!RBv#tDJG%R90?`ZFcUkv(+otie&(XDyh zd(cJtA-&U-d#~qldU{mz{)6nZlpWc8%>u)stZ?L-EKnH|!Xp-SK!u}6;BZkb{XF7G zi&U9&OV~hpnr70XfDVTE>r81>#}u5hVb)NHm@xxfJOZ? zfKx57p>>T=x8%3%MV>w$tDHw!q3$%i(Tz|4*I8;CnPJQmIW?|7L{nnBpsIHf=j6sh zrffBDm1Yh@qEb{r2Ir9S8EK552%dV=52079`X>?N9e!ouK)ThH#j|N4VFVfVB4+BtFn! zhr|LwA*Y%;o%%@k8f@`g_-H!fxrM{eaG*uQBtKi6G=GWSk6W~4;O8NV70*VTh#&t%f?{#Ne4 zAdd?Tcc2%ckX3V1NK8(~_-~Fp;Hn`SnysN%1rvGS=1$mY;VarSay?H8x4?@dVmWA( z7H({~M|u*8c=J@Lk>FcSQMTz2meET756i_T-3!p+;Y{vVO3*I92D*l0NH*61N2J@M ztSuAU>KwSkn@w;lw==-9Ml~#n8Vzq{P6}Wnm9wET?NS4Y@?~La6&H_;*kyA-v`jw zx9a$)XB7YTO66gBHNckVVX;Dnu-YR8&6HJn*}aL>90913TS)4PsT6e0K{T)YPM^Ox zpysK+wEOW-%F&bZ4ujK$n9>sZ5aP=-H>`z?)d@K8YQ3=ED2>$mj^clN;<>Eao@>md zmUD!ZEiK8!h`IyNVSWy_*^Yo|kDd6I)FfH!v`-A_pv{~p-k;^#xy{n=;1ukx;NQdvf2_RneGSx?wJ`Zf$1b&@Vw4&jIg7WBKK zE8gDX4*MRL2}Z4B;LWxrcsgu5ZJB#tD3ARFAv>mHY4lq1-8%?6R~F%4M~T+j>!0&~ zQjTGK56S#8XE3(CZV@MZ%z#b}=B%`>C%)b-nO#m4fo-&k_`C8eY3K%^)npwWwP+)) zJ>!cr|5{?Fi>+Y$WgshR?4c)Wi?Qp{i=bWN&ud={z|k9nsSaKFO<5Pr>C_vaclwW> z1UtjX5NQr}qL{G51{^UU5<@;62aCqtqGj+Bbd5~IrhOLZaOr~>U!w`>f7E4ve%tf) zwF^bBTqEvcVnEXmOZy(bZFr*kDjc)(Lan8_IP$b2#rUeQp6)7?Pktn(t?VH9nXIE# zNA#%Y3}qbocCaw<&Lv^W(mD9pR~hZaS(yKyG8lF@;i&N@((Zd6Y!IaXzw$23-Wy3i z6>|C(^cvL1Xj81~cj|9m4gR->p;m?hZcy>Yru9zf6&{M(DhuvAih+lXMPJ|N5R-uhMiv^}v- zY-qX#YNMsMqop+!@BRV{<3@44!Wo*o;2+KUy^_w^NG41p1Bsl!g(g}%@`VR)C@07R zKD?JqTWJmuCmw>jo4+BvxDU^l=zsIK4WWZ~1t_gd;@YGUyiw_tC{OIk-Iwf!@#TY9 zU(1*`sVw4*gR7x)%NsIj9|rAtO^|!V9bF?@MV%yIMd2I-B>of{4cuANI7aX-x+iuW zG6Z+ubihk9ywUx6o7mf^jO?oy!mzekEQL~pvfmE0?8Iu}meee^t_kMP3!`LXlVV_1 zm1NU;+!eR~>x!En?L@s(Ik@XfCsaFfSF%wZhW(~BFf%`t!?pyX-u0Q}7B-eNj;XRz zw=sA!xdB#pxeIzhYcSyM4tjf=T{Qk)=fiVW(|_A5;Z)F8d@@3pT|&*_UziOQ)AttQV zpvIAjujs~XExG>3j;M6e5@!?*#JmGz+0l3q|M6~zcAE@du_lYfgN9f)w-g%EG}+c5 zm+$Ti!uH13a60?0IQv6^s51NlS-Tiw^|7^hR-prXs8qs|juY|Gc0E#@GMb09gOOo1{eR_=D9@u6BPbcpQqtu$iaDgpeNiNO>Er=@yA&K5Ov#=qOyXaw+=NWPqEC zLDisaN4tMRm@NS78HpC3LaQ;q)mW&98j8^O73b z(v%mnNuv^kiko%trg0{o{56{=D{PkA+v=mn#!|40_du)XQ*o5WQ2ctjBae7Imix`< zfCKE@VZvW?q3G5>Sg8x*n0bD5_rWx-{OC;Ct2VODv)*`r@N)=1j_4piBYf1@z@6Nz z_&1-HpF7qpUe!HA`i&`k#$v0O^XfUpiz$-D6~O&tE?=1T8Y-nXeYRgQxV)JPEA35r z{J&iwh^pe1+5?>PuwAsAN_1mnAWxmY0|xwf1M@R~(^Z#k(1iCPzuyv2{?bgNHI>-n zyd}COJ)rSR6|mXy4WyiE2K`zMT)n%R9?$KEeH2sRk7hqSveA)ySYHI%UCuIy!0xK=$>dr$e3H5eXww-gyJ}tAqvv0`I>(ANv&%rgdMA52)<0lUmoM+=LW@OSky zN?T@u*;@M!Q=GVK4L<3CeZzV3wmeLRok_~%tvUhotVe;`eO>gHdKGv3 z3_x+C2k(O>Xq#OJK@SqJ`Aq|DbXqD}4(KEEUg5-d^D7|M!HRtJ^zl{X6R2pE$R{hW zh-O`zg{%-S9MwG)--kuOv9Jy}Lh1lP`AB|KtvK{aNXo0a-Uv8cCcI61TpfDBTaF^ zs3C|W9Ps+ytF&c`x_DTkH}vR`N7V)0_?i9;>OJ?o+#32~zmOqZG_51I-h2kzXPHsh z#}Zs>e4Ym27l;a!T9#9_XxOG2DIe0GH5T+|%NtwK)~#B4d#Yo@i9&Ea(o0bOktxLt z6Y-Hr5}kad$>Z7%itQe6VRE|`Cj0{U_v?zN_H~myso=YdUDzL(*U|vJ-%fQ=bC9>X%UZjevFoS<&sI@Dv5ZNfzfuJ{Pmaz*XV0N z)QCl#BkiZ1U#_6w*^{wi-C)?gE}c6#597MxzHFoH&Cu@wG$d!!lCL_DI7^57?dpS5 zYP|XBB5QsZ7cHD<7|H7f%HePQXXrP^iC*~ihK#XOg{gUeBnkL<{C@YNU^%EOTDrY; z!Eqlc=R<(tax;o`cORscHy_Z(fUW$oemf}~n7}%9g~C56pQwAFP&D1W1>5?M;L}^z zqr;d9SX3_pULX*fty;w6_VH zJ(_9e<|%wfveL}o9tD!^jaTJo!lB{qqJ>%(IrdqD%Oq;&jr(cB@@4vLJH-i3ERCe# z#eZNb7{dT6q{AaU#Dwd&W!vqCiXj=s(CvT?UV4yBr8_6_CS9OgW?_(R)?doT4CapS zYGpg#YGbeIXGrV!SloHCK{mBe+Q$rELuYF`vMjq@th4vQuo5pU^Ht-yCc`13+!5(@ zIk(sq@p-4tn5YRTdON}@iAE8*ca6N^8`Hx&J+5+@j?emdaj0CKXFY!ho9d+RtLT95 zK5vDmi{g2Qy<`XNby*12%HnTxoPnFyf^d2fSASi^dOeKg7aOm^p?w!cb9DvoP?=4S zZTj%u$uVH{zyQmwLwVO7b=KW?Q(Tpz$6sj|E%#L8m5*(4$JBJ%?iR&Ae6?BKyq%iu zzl$H;BIQO0A5g;ET$bm(hkd(4P@`|T!k z*It4*Vgj-2vXv0{^oKYK6ma7@D{h(8hrd*eV~31g+%KGO~ccRpTQ@|;JImMre_CxNaGv@#ct+@ZnPy*j)N4y8o?&Kl63qsGAu- zO&~f~ypC10!zFUE6X1+FG`saT$Tg2s_mDBHmtw?a)4qc8${I>L7>u8;UJ+(@ZzAW^ zld^etd~oK9j{J^~(z~)fFf`biyV<+pboF}_9&kdoGiNCd`}|FqH&6vzzw{wLk3h7a zgsf3h1W$t&LdCr_zIHR7M+Q4UVOcR4p4JrBpV=UIni=uFDG%V8S_U5UETtahyP)q0 zX$ITmfVp~S_^?9&7pzc${8JV5ex4V8tvBLbx86~p)n@+mz>;TZ5WOntAsN*d3JnI$ zv`wcs8y)Z?zk539ty#j}=D1Y(2eoq|x(rT$5{I>vQR9c}bEK~7Djnfh z)HYbMz#l$jeFBI4y(hx{zQyD!0X7#80Qr*5bkQ*%TyMr;=PsXN&P#LLyfqE)&FRaP z;w|W+S_bA{9z#LF7;Krd1U(a?Fk7t$PP=*l6H2;Cq{{KUaJn@r+#inLAE{zY{(D&( z_u;^<cmwf`T2!2i!*@cmzYfZzY{119)I`~M&Q zLcss>7j(KU#rsxA4ekkL91#Jym>7#1UqySs)j#-h5h|#_VPaB&RweVYc{jQ z_X~bGU->09EEg4nepH5N@HrRmNzDxXs_x=Soqgi+@-NU`Cr^0rvRT-E##E*^!B@1j z0Z9CmRXf`FC}*wgnp2Lf*~wK%to{sc zK|95gvRa{P`j*;&zABV^&jBWFF&6_U8m`^Z5o*S&ka6m9F=F)+VXya8*frp$@bRV- zOrd14hithJ75xBSzJDnG*iz?m`@R+EnVEz9{AQ>soIp8lqXo@|7$JIMT=b;>m%lLh zfB6f6|NHp`{ms;KheJ(onSEBw` zx?+h?>DJEgg++vc>S+D4gedxbe9912F1) zz37xJk>S+~VAby=Tw#14!c2`YImZn>pV!L<-%EnziRqYrM*;6`?j-lQ{*at1P6#g) z^5F$4QrDZ->@ax|=zhtds|)uDQ<4q~-d5wGeR(pDR$3=0T~Wp;W65-UQHw*p6r>Zu zkvtEW3l#y9-E>D0J36j`fhh^RZ{d2B?|BXw|4KX*mqImO*~Ddcgo|1=puJGKYfUr7 z%$$+zmo*-jsqes-)Fu@CaOYF=v~f=QII;XcU3f}IU}23FJ9xZ=Jip^~e02&>|FRrb z{QE;)hu##d_ZXqy@JFB%m(DuNkATY*O`h{~7oAXCBffYxgtj>Pz(Qp=jw~~$>BUk< z^O8H;#IJ>_^h#=Ri07k6)nIXAJ=t8T6r3Z0s@`6Ki4%OVQK=Jd9Gi@bgD!z#!W?Kh zr^~xt7mIS!OZ0oVH?W5tU$7`elUyB+ERcbF>m=d$PdohhE(mvTGvb(h9qE42LjCF@ z#E}z{(CUmGMAb!M^Zqvwt15$6F?T6wyED8UY0KMMJwW)}EZEjNkWv3oQgXQ}8{~gg z*jjQ$9yrn#UzHl-0lAdvDIWnVz4r>AZk~XCn`TkZMH21s;RmwNy$8#kdrIc7RXk$G z0=8YgUAQzl8e?O%IXHiq)aUiV?ZPxZA5lepegg-&e4?4#g0c9P0`5GVgfq9A(Ic;p zqC6-B*XKWgk$1u+`l=D{K41rilO9up%`ULhTOiyET7iS@^62-D{&;+bZ*6#{1-Gx9 zi{YzIi1X7!v1w*+R9>agi9RFp*%Y$;+Z-;TTPG1EA<85K?nIJkj&l`LB%<^z#tQj?L8_4nD^u>GnIr)iG1`r zV81v@+y^eF{zCN-iTb4he8luWI5qPWyit>O&$d5gtIkUK)mV{E&94UqvqRLLm5NJJ z6}dY2fy{VhGTnVSR*cnIh95TML&r6CF!s|_>e8nXVz*Djz}WXdnKvoR{2djH&_-)4 zl+IUgKJ;9P8hQ=j#^MTkF~0*^3ZXcfW^h(b1KEwxheIa4ang&9RCmKpBK6n96s-c` zhu<6VzS+Hd@ym%zfi|inUC>qE#|%J<7y&XI0tL z-4`AEUKaQD^Cj6k1@!J|hwVL>>Q{B;sNvJm=x{W&+9qK`eX2ZQ=P%gaKLU^BsNv&E zg%jBt&7%LvMzZ)rQf|ZkZ(t(mfp`VgF_SKQPFXuB*sf#yux@~~hoU>ry*j2Q7 zONc}?GiSxdZ4`FM5bJ-f#SuH6NHd*|JU{s&>{q@Hj_&%ONu8XVRYV%5oGB@k}20p)GD*jH?5FU>zrsm*s@bvmg`|s?KuLAx?qS6_T*JB`{0PCAL2`S9$D#}Ct>C( zvdp+f^S=v}{@V{WnCA&A$bdVYS%q=Zxtyh0LzaC9;kIyR9<^aVOvsrd8{=9`YK1Nw zKF*gW-MSz&woU?n^@TX*>Ikw)?T?~!m6-i1g@P=rVbAP|caPinFI-c;FXUAE=40 zp|^!7<;7^PJ6NJ#PozzbKdX1uo>P%|$Sy ze>SdC8;>0e&%?Rvo1t@RCk$EGm+kzey;YD8HRP9p!^1~na;rbiE(ydHMS1X}b&$L^ zQWN9kmMk;M6I>VN;UM!(^2R_jH1EF#n!Z=jpx=xIOC}wf2-XkKQYUl2ToVV$C`iWN99g>AMJ^Uq;uG!_gyL%uEN?*4z#Gd2M+t< zMF~BsVXd)0Ug~AR4T1f!*Jo!=3RPe`^J79`lLHSd2*8##+oAK2!T43WZLA;ji)e%$ zZ>!u(cF$L%x9>+dHOd3W`E(|kw-FaO^}@!|BHChl71|6mIq>jKiMVwN__{5>EObPx zlHRpiC;V8!Vg{SskY*4&61dHr#914hxiL}df=qZ$anlU3*QT#Dafmm#h-)D^@TIg2 z*W=2nhmuKiDjcjZ6Ec75@$rm%Y$0Z#;&BsH@1!H)=l)4M&!@7}FT3NJ3ko=I#vwYe zd^y?=*+G|=RZ5veWzsK+;Sn0Wa9O-GZvz9IJywQOKkM*DiC!2})dRbZHD{;po5gsK zVpy&$b>nMRg6cPd0d;DaT)vF&e3{6n2d0U$#FN6(xw>o-J&@;r0v@zr2Q_x86wU{9 zpkwx_Lcfg{xa`;v?y+ewR@q^L}mhnEE0Dn96;%v86%&k~^gD1N@0k=(^@$X?DtlyLh^HZ0h?8a9*JSrZ(v`k_ZDX*t^qB~5zby5tqUP$9U zsZxHz6R~B<4f&;h8nk_{7Kcms`17YL=unh{^p?(+C7cSy`8{1Yb&ey0ay$7SdkgiK zocZdpGjMlEG-OKOXWylzV*Hm^P>$}xsGL6RTC@_4rX`B2HcB(G)m`D-KQkUXN$TEr zYo)mZ50J*CN+|0#2*scMBwo&J2$`FSTkr3HVZI%xLxq>9r22>y6}36S#1k(ygmQS4 zJJ^hJ!aX@EG%UG-uo@B(itIsrVajW{|UnnRzYrpE6#D9 zi)Xb9`T5lhj;+w*y|z~&cv~uGFCKzRwL-D#S~^epaT&5ZFCvp}z2MBcyu7Esp(Kb9bnfVsrX;Av#{}Af3`Yp!NX^S z;oyV{C{|U5+&gBx|9dP=mwE}0nvMy!xsgKR7&}aeieQD-I{HYx2udqNC+DMNnAiq! zgR<~`+#YJRzDd18dvW9Q3Ygf{7>||T626Vl#NStP=Rshl=>qDNx2kM za-BL|I|;9<-oX%w^|7Rb5~=x5hq28Qs7A2J6ea|Spi%$wg8WaSFplZ zq~w#wMdt{KUOS)@t$G_M&E1r^ZpmF}xRQ@1QR~GjH50yhxk_}_Ho+$v<9M1+N1UgW z&bLx6$>jTSc_O@p#l!8nL&kX0IW1YGRr~R_UjM;uQ!ARf=sGntS@M=NE4(+gE6Hc~ zzzdVh=)uyV;)~>V@zw2dII68Lrx&>6yr-_L{z2O5j6F&Q1}=2pYAr_nbcS++0Erv) z86@orl*b+x52o%CmcE%!)1}7Ccc`%OnSt2?gdM1_+YoTYWm(bT+0XTWjC@!}f z#9JqO^X(a-)THGhsx`)7bJPTylinsK3?DC9UpBziP48&<|JT)mr z8B)m*N^xI%BWVy7X^>PhRYF2(L{yYn(m;wzBhiTKY@*U2qEeAYQc8wOrRjG)-_P&) z=UM)=*1g>4vaWs3KJWc{Re7>s5_bIU^S61kr7F1es}FV<&4B9;O0a*wEt=6f>TG`j z19ExxGg*!-m3Fe3%dFspHRC0JZN{=cYpL5~2rk)i6J{;h$sf|vMa3Y-ZYc;XwDJlF zYxh9^nhP*?+Gx7_ES*0UvAk?s{SPn>Fu{0ZnRyFv(mFM_$F z4~n+#QOD!+yjiVDA;`?QE_$&jfVAAtgP)ip+LjOGI(NKfy^#`BvgH7;PKbFMqXg4{ zHd`sbi-KN`WJMY}IB~QIE1sCe5`}r=5znTxG1sSJ;NdUqqUs%{pOc6aVy18!%Rce_ ze{M4M$V2Se;_Wzg={d~1I2ebzOTxTiK5U|qC%e=%8m%&xuxRRXno@M0>6||R^PdLbn)oVcwXlPD zQ)3pRHHChuePDe9%S8VB>;-ye1kJw>xJj6sttzy|d+as%t<_`RMLt~91oE+MwiF*KMx7H)=|Sx>awuxxRl^2ShWcez zD)W-vXv~1VG+Ffdx*6r8OZm@+XIR;@ckpK860DkIj>>Qxc%+i6vGK%YnbXQ)#xrU~(95imHc;!T-WVP@LDumQ~(l>w-Et znE~I~Cksm)sU(I^I`!#08&1!CqS&LhbUt*|czE>B7dx-nVyE49y3+g^{u!(Wm4BE1M=)}MlcJsgA`&&2eQAb6*@m;0kyz<(B!l>hC9R`Sjf|UjHoPj!mQZrccndS;*8cjK`*@O6dO7NHG1%VEB?)(r74R zehS%)8BBz>vq$(zriWSZsjJwv!;=#ESQ;NV5FhN&K*crb;5}6r<%@&q_>`gSxkWx% zxO%Z{fvfa<)*$-F-{(DqgLjH=5BqRWjY3{VU`>1mi^(bk<|jeB+g79JcrT1|)SwF= z%rGt5fZ2}@V?+BVv*vr(xtLEuFyzfHdhjkC($>Y$Z0BSQYDvKHg`=UT&W&X)HD!Ze zM#GFztMKLD-MGlm0*nutLvr0ENbOq3d>)jcMY}DGn|g`PTz`y-`VFjGxM;HfP=g(Yd{}ENc=>&gRhK+iX%8 zt3|E+d%m7_GT%*SnEQtaVJ-Gz_S+O%Aw7#cO)#u~~R}ax3o`uLhaIEJ=#ckDpl> zPq_jmTG`E&U;JL^qg^(Gnh;&^wk%=h(-&du3SG1d-Gf12<_P5IW$@!cBII~0W666V zpC-)smh`No$I1?PWkCjW3{tMx|WN~J68 zyWcD3GRXsDv{TDomhOTtOMWn$+)1e4h$sm(RNaN=r_t}pyM|8S}?qBxWjgx@uBnD9o)7V^_-(!I8N*d!wx+g^xUmN;x851Kfic7 zHggGzpKf6rL$8oaCV%MD3v^a>zn z#ZIm*v4-&}3(>PY2Q0SzWtmU%x%{nBaIV{r3z(aWX)7;M{lc-BT`Z3l_ATJOZXcWO z^^(bV&!i_G4DdyP6zT8%A(|o>YIk3nL4WjKv(u+9P|spb+P6m&O0hhq=ks(&-&SZ9b6RD+b0L6-ZfS&>Z`kL)~sPPzyi-Q`itJ>b+S#b||u;dCP zSqx|Uk`nOA1Q&WTbv|{P3b~;nCM=!#W8XzJ%3XQ@o*xfkbB2t97W;ULmoMao^=@UC z-rZzcXD8DF(;T`Q+6=`%glE}UjjfS$G2Ppc$}G*toOgru zv!QivIfX1W#tn;v`E3?^WM+e5pQKQE zcRI^`I|H?+K7gw949>SPi)&Y$U21Y9kCoJF(`}nLnlRi2W!DOs)Ydl89%TTZ4_ngG zuj}E_A#3_;ybC?_#3^Qq2m^0f)8$1{SkfBC?Y6xkusdwoyk(OqHk`wQ!7J%Nk%*6t zI|uT?8`%X9EtF|o2{X#`ILqXN{5jhoIwQR8S|T@N(y7tll-b2i&hgp3~8OccYm&2rRz?JU=&5bC;S?^$kk`XO>?IPIaji}Mb zloooq(s84MWkX^N$1Ns4AwZ{hze zDdJN$1)=}6Veoai8HstOVPLrnIq&((zCBuwg)==VG;TI^uaPe;d~*$Ii-TZy{6Ms> zH^IGLzshWqN0R8}T-?N^f!_=X`m?|uQzr{NS>?gxF5Sh?8DL3L%WpxrP8?jb-bdfZ zv~sVWUS@4u+(7fs2>L1X@Km1(ww{+msl#asDHfaYu&bKiHhc-&zDlsB{0nA!TT^NN zscYH57D~RV~VAb(%ZbRlQzT5$^?~&M47niV)72OYv@tKI>0%!!ykc-?-@weB3b>1R4X!3^jMODH&r9RjxbDdwM?m@~jkD1l3-B54%n|pq%8(vg@ zU{&vjiUurH!ROy*($N__H#Ecu>(zaY~0YPu+#2xF|z-OI{DD1s8TX!%YO*5*n>qiLg*qX`W1eT^%_AU$@#BiTpJIDi!Wmhpj=jMKyt!HS_?jX^Z(z~YwvgBtDp&}< zv#FKo7_&f~c0a#E_qcVK(cz8v9wejml22^HwP)~IJ|CPm$)f!XU)u1c2$J2eK;_3v zUbFZMd4021d#;D$E_>`9IsWh8{- z8~Igb?nC7`*;8lA?xfu0dZx`K--XR88>7m`*@lAquM;HOD2Lm-hER|A zeq3|i8BgwtA@id-B$Ca+h4#jz<<4Z$!s*qHx`$k08~ZiMktXv1?K< zOFui7)(mi<%ksBaQ}cK_C+p10vfL?Nur6IW>VQcbl0g6dedrNbI4_a|aI8Tz{26kC zL-}>sMyjYkPz(E?d$7K>o0y)sHFdr^%%3++=f>K;Wj8dPam}OzuKl$;^QnEu9}XHo z%@eFx@X>gH@22EB;})koql#@lHH}UvS!3+%C9vrGBwF#Ri}P5UK-2#T|J%C%n9o32 z8gVt8dZ&8BnN#uTY~e_EXpD~nB2xrkL1EXB$)As2G$9`shsW!;LsP@kR3rN^oBD^W?b z{#826bMoO|96k+NH|FA4he-0Puw-!^Rowc$Yq+50g-kI*0$hvFkxa)jT6l32+uz{0c9fny3dG93^ir0V%96@*$ihGiN32soZ*5cBoqvF(ju7Y0%4fh%$9fhZQA}?` zOW@}6jj-};Ci9*7+q!pV8oOoohFvMphoX2#TAsWOQg_&bLFql_94=(Jy00?-`$kkJ zIgVO4$x@~C61KkR49k6RfEy_tiI;{8HZNWMf5B`aA=wqfp$}uwp8}CPU3>;{{&efP8{gd}NXUX^;Es|_4 zU?)W_Fu?vVT&{lv6AY>ZzTS44Uow-;v$?_Dk|<|N7meuKNNr{!*p^-PM4?B!2fG;Z z7H*ve8v5%qyE}vvM%?eSt`QH|+~Y&>VNnH6GVNtc+t<;-0~6W#fZJdj6(_LJw&JpY z!Fcx6YJ7kDXjx2!G|subnI7L=#jl8)O9!+KNU2!?Qzq;n9qoCncDXF5{Z_+!Z^&EIuxqy;^76r~oVxPVVM&E!;W-1yDt@q{-TPyHXc{A$2Q3`s`_u#~}3k0*X zEw~26-~)&0#AMv5#VL@MJy?UU6-ML5d*jgL(LwrByAIO6)iUP%k|lrDf*&EO6zwvP zJvlrVRx6iq+D;J^q`ig~et*Q`eIB6U97%Gy+|Tw&OJi+H6jfMrEbFoXj$g3=({lFF zD}5)rU^k8XT|W!Qt+~c0wwJj#1A1Xng+T0epD57J_wNGp~1h zDM7uHH5cy3e_MC5O>g{hSeqe=6}+>mS+Ejw%F=M)yup|^dm^T5Hn5sWGr&+!j-K4~ z;qouoQD>4X>07SC@a`qFE-9Qv9R)OWQ>2AICZWg9-Pq$lk*Uc_(TWjLbRp3IZ}~st z(`hyN8|`K_j|RiP#dB%(R2?$j@>mpAs6oE3+F_t}0hMd!a+BvOVDn;qEYPcF1FP*w z>v}k%f(N}2=6^TN7*5YJj3{)RA%xo;fp1!fiX%IiS;Yl7HqQ^W4`kxb1_c=*$_Ib&KF7C>i z``3=ho5SB5@gk>oN47d^JpCEG8I)Rk*h*=661Tp`J|*3SSNaAdChWrvKF`EH&8O`1 z{^2xf^BFkHDPm^MOp3le5;yJ)MYHGT6ym%R_dbgOoh&6hbD>)FjF-hC(>?e(R+z=? zkRn&(Z0@Le1|IF$&!s(l#%i0SSeX9k(uPhQ_GFGC^y{a79;!OdVmDqTk;{q* zYCosTY}S2*`ug+WJw=nUW)39L3`;zju84u-mavM}SyW!L2mcwCvm5tzk?T)4I<^&P zu*NrLutI~L#|r$Dm^l8=u0~kaZ^RB1onk{%HCc^)K6q=`i0pf1q5RVyz9G|uDJ^j1 zjyK+inQqtN>eP2o5x4+*xODDr)mCgB=!IXbLRj$5Zs`6LgzGN+gh2gr5SNaha5W^OQeJnfGDX`5g-Ppki76Ri~4o&Y)LtCj)SSOakNlzF_70IWBdmLkm zJuDc3&Ba+ltON#~+Q`QBO(&aqda!Mez<`bZ0NZ^Aqh8Sm*5>QOg$#KG0WAVE-_I6b zhmT{+KF+}vn8|ioiOBq>V6DDsiIbGY&?bC6{wNW6wYSaidGTDq7C8d0mu|x*xesjJ zizrdYg7d5iSe#kT`xF|bdsY*h6`%;rwh4L2vUQy&Q?PD-rW z(ikq}=;P)bZR&RnMh&5-c28Ch&l?tlihmr|-L>QsW-LJI1Urg#oX1Ygl0&J&YbbSG z6XIuG;(RR*!GvZpvJ}_ETjsrBwV{d26POcnPevkHZo$4~u~-u(OF4h8@OBGLMbhSZ z?7;~K*kv~odrcGB8C6L#?QmteS~9$%MgXRdRK|!xEppXaf>+Agu} zNg6|NgswQgOPa}4SINR>-{%lyXibA2?gHfzM_Ja8N1zcXu-a`DaB{3a)>&8c8{Q6s zgHjXu=iNMO3Yy9mZ(NS~$M-@`?OZJFl?U5hLO?O0;8^^Urdl8_q+9a86YilVUwpxDPuhFw6grX=kp}>anh-8nhP* zdycLrY}q3;IUb)riDFwX%HZk4g4MRA zgKz4|hy0>%{3~fKrlo$H^=IBk$=RBa`_YfI%QvvT8R6L9YlV)+6DjJ~NNU&7q|LVw z`6UzJp^%X{Z1bD9&@Y2d`A8@hzGLpCQdkxl#(Z%$rM)Wvf0co#sWpsr=lD{2Ulg4# zTE*?MKT(#>XWnYT?#d`9VvOYuw4Z4Y>hdt}M6xwT{Iu&kSea83OP|~he=8^8 zUYR>wVWR?#=g;AA?-oAFc?W=DEgP`L8BA6WL(LVZ(JyTx+P`RKIq_A@*-YRlrWXpX zhdmeteq}E;ebH`-KYlLrq3*$7S?^a3)N0s>Hy2DMzUwu+^zjzEw55`z@7PRsSAWA5 zzj)LfJs!#~tRvFsg+eDCY_^s}_o-@}`NQk{27V|cC?8>m-tWZCA*JkXM6qaF5~9)oZp(0oK)OOCVe>zB^q5(K5-SDGKypqqC&}|brzO}8-RO=4O|a6 z1Ls?Bb0g-Z@k$z-KsHVl|IUxcg2i8%v5qjJB&LOmGW+1^+F4Z7ZiaQIjd0Ww5x!7X z#n1m1Al@L5|IXk~b!d6*fDxj&aezY4w8AOB!)U$JPn^=x{5)gF6nMYv*q01UT0 z0lF$GtW{uY%j(af(}A+M4yACgrwYVZ`&0jL6?|#A8QuhpqzIiM^x8+2&+=Kz#ZLtGMKXZ~;SPSKLVbqdW39E#;nX{@u`+RmXt3eLn+u^}l?#-vWIjyX6 zWf!yhoD8)!#_ZBBJ)Hl^4P#`1Hcvjt$zDB;ty>G>lcoxLn3};1KdPgO-nD~eJkFrtHOC+=%t+|lp9KH4U2NN;Q{dtt%p^Hp z}yjgb;2Cz@KD3|YvjrKff8E~;*GQ9_F$5)8@7~=!kg`p z9BkbL>EkMSYspMlXV}WNd&G;hJG61rAn~&6ORdl++ZT_h=u_f)arAkX#p|nTK~KLH zEu4Ou)!N$8)RDs7UuzP6=_W{_yIlHdRTeCh!$OXj(#UhesJmq!H_ynIt#=5gf?r$N zzw?Vp{j9LL8$5u#UPr-FKRkzls_QNuD$?}j;%5%^MdHwLCE;QP)5p~o|ejZ_t*cRiXg^1ddWj?u(>nRciW z76muuo#P(H0kOzH20l?YjX{;ca)kayvLc`7bJ4$U8CTcRRX%4KgK>P z1~B(Wqu8F@gP5Zph2ENuWMi=mwWr9?5@CP#xK|7>SrxJK^)J9rGnZYzR0d&X@7S0P zYV>SS084x~R$vCSi|S_&CqttHqVgGYDAl}>(_a||MglBKVOqIJ_L(9*i`1gkuY!d= zw+{9E8O9fdWbn1q50~||db80lCSX8cJU+^@;1wPVOqFt7i1Zftvm?A&qV=V+Wx|@6 zS*sz9f5L6A@xw1?&*Px;8Qg)ATLSy+Bpbf^0z0zgB?~q4r_0m*s9bS9`@L}=E6ovl zA;Ov1<={}<_TGt?+@eNLt!CrdZ<;vJDUMe_m&Qcnun#T*tPZFs2iEj<|2VKgMLn;uqaa@ceE@e$RjNqXdRm z__B7EKGhB%Bp=7L1umGmkV7+0i(EYAvDo(*%q$uW)`O0f$iMx~it^jojyJl(jMW%k zbX*CN?<@eL1C6ZyX9^oqk%Vh&XQOjzAnSTB4~8W&7%~0`%PSd*Q`LgeWUx1-6w6}9 zP~rZ`CKk3tjTg8VKSAB4 zNUEzJ0C#-ENL6(kUjM<-7KP1dX{`v_{qOnlHZhEwH5}Vjbf|ojGo^i$WdrvpklSHn zQc;#9H_b{u|9dKE*nVS96<68H!B%+mKs%eS)Wgn;O=B**A3(Nt0VwP0vGRR}bbjC* z{P)=g$Gy*DkCiwYnEZ>$3p^rrN*{k2%fQe_GVJr5A=n|zJ1E#+0JHUxpub=*Tls7U z)z6)RC&QfB%lo^?^Vt!&ZW>Hgokqf4^;Nhp?BAaGhLA_eE?hGuopt%Ir@X_v!NBS= zM7$AWPlv_9g|5jMnQKi;R_}+fg&eoxi#TrBlLEX52)Z|ILFF4kbn{}I zd&+@pJd_IJIp&!A`xbk*EsdQRuoZhoK4O6>e$Xk30t)wHWyd^mNbzv`sX7kLPb#88 z<8XR4Zz{U@1#smBKJ@UFJ%~?r;KqH@#(SIF!Lv%AhN|VUW8Tl1-1<*tC(mxCC7xDR z&&J21O|t{4zBfdL1B=M!pDa!EYbw)iDuj_s-t*(?R4BauIBYp9_(tUSG6_k6c{PmD zg^D;FUiOeB-JHUXC3(U48i5D&H5ncRJ^|0(;i$2AKdKnSuvh=2q1i_gJI7`)t#|ME zuP!TafzciuEBFKQUf6zzR za_Ky)-Ef6D2^{L+L($ex;sWsSgUwVe?7{Q;JSbN_0VR#@!3i%T?x4HSuL{1-l#cA6 zhJS+~IKGH)I=q96x2@x9Po7~a_U7#7kA*Bth%4B}c+D313CET*ppfO{-&^7QA9K61t#$Gy3Pv-@|i`%2A zc7HY-dF>$nU6PH{?^QsTQw8cTm_{`X#%+^u3>z88Adw=qCQ^aZ7pP3lcu>X#?3QFj6LXD?7=yS~% zzLFJ<(iuykgMRRFj)PG7Bfw`MDI`Hn6+=PUuq5=vl@w#-rwQDT4_?8 ze-KUIW@0HPxQzDNA6oSvw7C)#7D zTRLV{o`=ONgJIwg2|Vv}33Rgz>AH#v9u~z|Z@KTr@BWp*l?23M_;^)zI97?a-BqL1 zq%Llw{}Biu;>S9qX4AvAN#qwHc>YRNn2x~FcpaCF=OS&0Ke-1M(n&lelEL~x6PTx; zJ+@Tnlh3BH@X2v+*_q{c;Jb}4Eoe$YrMGq5TnP*Q%7-XgzFbG>vs=*^S4G^tW9p7}g10uEAM&Of&-z)r*n2syKh=r!tL~`^+r=DAKJF zr&ves3T#h##rJ6;a~Ipq>OTm+ybI+lCFB~n@Z1#G_T3rPPd@?^jKH>-a!5V?74py# zV^;={mBizccclj*>uMzLGjJj%91muf{zYK)qCR%VWja@NOW5Pw5bm*TlUZwnE7&A8 zuro{TL7^oIrOuc@^Aio~7<-;i?Gt00Hb?V8Te^6)lT|Qeh79VaM`EhMcTr)eDlMIw z$n`vp65Z2phnsktOCKzFBF@jo#Ko;l+3y<{Ewqd}iW1?tm9=p9Wr=H#JK(B4#;AAi zJ1bcmiAO8mm)u(10sg9p_C<%G^WO>#(wYDZ*V@zFGv?6sXC7vRj;1k%`odb2g7(hY z)Hi7rKg)XxJ}R6i__)Tyn75Af{klb5%W9jn~WjxrN z!MFfznsi|%7R9@=)$W6+_SH5loNY-*)(+s*22V$?+t(p{=SJp#_z$d;-OF-V7We3E zDoiXHLP&vBz_KTj%U+(<0zOmO%e3%)X?3?i2JMXiCM=v2=_Sg>~>x^+a$WlC-!c{ z-r~c|`i2q>e7_e`nxfHaLL%|JIp=zG1k^5m%a3Z2rD1Z~WE|%tc%25}SVt|KdsGio zf7bCso@X(=Dcf;yk0O05RbYyjMkA@$p@q9G$q5;wj6>7WJERZ~-5rAa%MY_aF-~Nc z_K|&zZV@uA)=XT@jy>EW!ds^=vYwSYXw5TslGYqg2}d;9n1A>9Ap4b=Ki&`)UULKe zesh{AW{z7`=Fzne$FaYMfD5c-%S-Y=LA?W3$QD?yEKS7D+WqXTr!fDMTMKU+T}fk1 zFs61ZdvH7k5vgti-9cQ??7rkOod+Vy z|0iU52Os3h+s2~_W{Ik$rGy@T44jEIWHv*Uak`LAzxZ8+%#ArxcIQFd>Hr&Pxr5CZ z_(GUhT}7^6tYD$oZ#K9}no^9;v-zzB_(#eTvfN^+V2TDc-!;Q2u6xT$XIf(6rf4`P zb&_>gW-;F}4dA?29rkQ?hrbEC*~0Cbgh$8VPQE6+eZkm5A%8c%P4F5;XNny5%!TXOZ{gg0Rg!yF z0NVR}!9m)BrrZ?v_!B>t4tc7IJxfe5N>VtdzDKnEFc%fKnd4LA3|93;5}sNm)B47* zFy_%>_-s6cI@LF`f1%1a_1IoAm@}5X<(gp3u5FMsGmKTEKC9l^1(`AtWY%8JcWgb% zZylS*_Ui59qCOUKs>ds#WWyMo7d`;@z4jpUrLo+jv7PL}IZaq};Sn=D9*_UdR@dYrAo?^meG4J zEAnr?%cj-Uagq6}Xw(TcoEOqu>a;|LGVKj8%bvG>>7Ky`8yE7Shg=A84tOaJ#ub)F zxn%?6d8f+hAGoJ_J))bGmg*i%GsZ!qVggw(;eg z{1>xG3OVA*vRkK6+g)u6dvF-rGu*I8M-vTS8*&|=UU8n;JE`Z#FYcoFF{m#81upT6 zXsWysyb3iG?$DgztW5z9xYo!v`gX(hL}zTT9?LpTCJJ?FJq=K^p>f=38qxihJH11m+uDH3RKsnLqiFvHo~tqmA)k*MIJ?XW`2I-+)Ldfl zMeRlSQ2K^z7Pv2ZgMF#4X*<9DMFEbmX=cq9zgc+fVB9reDJFP5WAY!esmxOXpXmKz z726iWx&29^`V}5no+Cn4hP1zT2fZAx$4}K1982wLcvCwHz9c2ExO-vHlpn<;dV^VN zK@OYnrknCv1E9GJ3>=@?~_RP=A|hNKupggZdr z_-_zVKa^Def@rH%G@^>HDN9@jU0jmpL554EXsS$#FFTsH|cB zOSS0r!UX(NE`h)NFEQ6gX$TTUe5GC{-JLrQC(QT)bNV*Xt}zNIduK1Olmr~5&hsk< zl<_vVO<37UX|VXRpJv4kpdO1m>}jkQ=|3`J_Kh3q{>k+$Yg~iyH_Q?I#AWPJaWFQu z=TOz(<+y5fS6OT9F_4DCr45Y&gLIlIg?NUAEc;*ng1PDc<1Z}#FF#<#fA|4jt|2S` zfBu5^fB6dq2Zf1d%z|Ojx*&E=5kyX7A=c>`H|OMFI5w&g)|n|m>-i#1GFj-9N|fAZ7^)tUc~YPEz24o+Q82%N^JU8buj7Q499|# zK>Kw#Y~NA94G^0OuY(4{>w|g_;~4~&72CKw<8rv4m)+pg;%yLEqz609Ea3a1$NXp& zJE3dP#nruZg2Go4V6fVQdvV7J4FA688m3JFy^*%?WXTu)fvOER-{2n?;u99)`@j4J zi~sT${QkRs{{LCORjwhc|Bt^QwJT)J|64&RF|o@NlIYj4G_1)uNx#njV5!fQnQCsX zz=YX}H5(FfzIv%h;oujRpLPLWSex*J?u@|g$}QJarf0D)ujZoOu)XxBQeX^frZClz zlkl)}9sLlvt9K72W1J>~4?{dI47Sgmf1krneuGT*BKaV=f^#L<%FIS7oca% zM%txqfv4NoVD{*IYQORWawMF&W#12AX69xl2r)XV)O zTX7DLzGwogEHQez`aQI*JVG0+pA#qWXceMLG2de!{QY9j6%XG^c8BL+%CR^^E*D*f z23(&z~eRM3+^S09f<*Tf|JRD6v$=n1guDyl|YC-^4BwqU@rZ2FcJ zPwV;z;lG*bY>3J=+_i5QKCrHZ`HNGiU%Q#!?5$_%WsE_RB`Rw@M3;biY`l4#I&Pk& z$1i1R`>~q>D{e04x(70i-gn!$~$x3Ceuic-^^*Sg8A(Z+krs=c)%_ zUg3DUJ+y(f*r?#;nq9Pd;BidM4IHNMLV7$fKoqJpFLX!q%}#!aSZArQQg@uXCL7wv!BOv2=j# z_l57+DM_RKGto!$K3jU?1q{!~;D^pY#>$Rve>;Qc@_)l=^D51ZB z8GehHLB(QmM9w?0a%>p+n4F1`g>|Q=W$6Bf_x0 zE|s?jZ~T{E$hQ`x@*nsAg7{10QLCh*Oc~a&qQG=4Nz!KWc4M*p_&K_fBkY$X_fc_P z1KvKfnEMW;bUQL`l-B@9Qc5v=xpHkE460TiMLw0hqTy zh20#z8RFWtaQ*rSvi_SzZ>5J}?{^6-IIxHlb zln$Lgfb+)ffWpfI$Z57U&YpP-lkyYk?{G787Th&fx}h}ShbgIz_ruwP+xhhu_OVCX z#LJIv{KyREW?*Ks244N)iu*f*uwjc1UX<5B)o2^imd&CW?dRFkdO13zqKc`d)l4)0 zGAWxq2gy%Y;9cNV3M=>vab`O?i_7}l(db0lHGDXv4t@r~iC;Ojbw%*jbu?5rr{L`K z580*9^YOIqIVuzK-cG$SwARTJau+WkS>e4fR6PeF@DJQPmWcC4TQW!g2SBZMSm61T zf0F?8%VIswDi}i}FQih=PF=j(8jY5Yhf)27J2&ZrB+U?N@bu6M`ZfC>o7(+Dbi>ph zf-;-I(b$8+UB405Z-AYbTu{0~@H`k5<1e*LS|NTGL#JhvXYNueTzr5sg7GmR#3s0o;velBj+=%XV2Sx*Tr;aoVKu;wYT+iKdW%u#mO?TYoM7ul@%9 zw|)fur+x&xhOGNf{gB!j5+YO&d-c|dH$|R3Cft3s9^Nk}goB6E`Qu9*%0kwMg@pcZ zQJVPQq7?Su4-Nm%hi-5U+4z4&X~3?Ki2s)L#P!OypRSIz^)gqnec(61_No3j+xP#J zZBtfB+b%gb##Xyh+IEfmcN;!^if!*#Wt+H7H8#<0A5psXRr#T=6r0C;9+zK=^(t5V zrEOa@aH8$H?b5cF;)dHElAmoeV(k^SI%kmWSEcxJ@z!1CMlQuRPcpPCp025}`PkP` zUX;*e^Xj>EMWWOJo5Y&WHn-nfR#erU69OLcw(YZwY%gld+BS|HPHV!&Z5JmGxBdL) zlZ{k_fvwBUD>mEC-m^KGzsaVwYM`yoTMJv)gOavKZx5@;ml|n1WBL-CY0ATGi+_mQ z%G_wMsrV{x`#?0ncIc;Co4!UP+g$I@046egk%fME>WYNW6b7X&Vw1781QOJT%-SM0~M~K(-2A7q&WZ^=9AoAp<=_ sB*r?5Xz9Xyd4>6gNNx*}3OD)x00Wh#x&QzG literal 0 HcmV?d00001 diff --git a/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500.tune_metadata b/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500.tune_metadata new file mode 100644 index 0000000000000000000000000000000000000000..1b1a3cedee628b3a79ddc3581036927903274e2a GIT binary patch literal 214 zcmXwzJrBV^7{`x#7i?yusVREJBDN-y?zdf@y62Kh&|dEdgT(5aSbY~C#RqVt*5Cj2 z=3O-zvmPM?&p8%qj|a<@&=odvY#xDJ1VE{rrQt-%B}ytILB%wkg%E|h%G%DREEZr@ zcFrOZQYvXZUca7sJS2qmor1YCnZSRhk=$0lYX(jz hI2bO}*6QY+b)C6@3qN6DzOyY>cO>u^KGp4`{{b=NKEnV2 literal 0 HcmV?d00001 diff --git a/human_aware_rl/ppo/trained_example/cramped_room/config.pkl b/human_aware_rl/ppo/trained_example/cramped_room/config.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b2a1fc0f5da7e00f65b2cbf789760ee444439b7c GIT binary patch literal 2544 zcmZuzOLyBu6qb{gw(k2)A5hAxDp1=Ev^5t;bw`p@hl&=? zp-on>g%vEj>7Fe+w)_LwvH;G39SiP^_H3^|N;RLrZdIP*#V%R)`4ZzH?9mkc0B)75NtZ^3MM8Up!QO^4A=3@}CUG&+ z2H(EMg7KkA6fae z{#buXgrsiQq;M3ix7B}s`uetnV9+C>ML9Nx4n?uYR`tEO7|o6!hQ#n$uLsBL<&&3x z`~7tFg6!jxli8oHmw(QV&EqDKN(v>MsFxpQ9?^MGV?T&PIH{#=2pKcsl(t(xj9xaN zPD~@)_lWHw3it`9HB&z(d|t%;V3;2G1c7Hd@T}T_GpeSn=fFDfoLa9On!Att&^(!< z$C$)~(LUmPc1?&y6C2(>VK3W7H3zcTICW96; zGK)htIZEgdLpd)(Zt`5zlsJ~vBQ&E?nMmMuRo{UdET7vez6WmzLW>D?k!ho4mlC+F zrZW;z9uZuN9hJQf;mtKU0L#>l2*oK`Mozr7Ci!Rhv0wt}`_u{H?GC&nmyXOPasuA% zzQN)|Ib5xm|3oMLNPM>}8lh0F-y5wG;0VbJEE}fQBs5*)359DNxP}z2BZV~u zxjU_{Z^dsG%MHjYL_zR|!Aw(5te6(EfrlSh=VD1}44##uB`~W>9EHtyYy$@facme? z>{5?Vtd9}y6l~W{n^EX8iw6mI@YoBBChJjb z8nh)7!^k%*8b-DO^Q+d`HEgSpEh%MYRx&KC%3cfcSyy_YvLP%s{?18?Y>=J|yXy(u zNQ+^?NUL>0ZE>MU9Ya=ya1(`0G}yGm0OucWrNV-FA+iF1kD9W9nO~CNc6RNsI|1Xi z6d0))9bLmnt=1gbs2k)OQlAlT`AUfldRIz?QmJ&YR3a?I_X+$E?_O%Zgzrnvx*a}F zL81djQ04d#2;P)pHaQCs#877ViGB(#TU*5GkYuZX$}qx`fzwUEP$dhE7)y0=y2MRy zn_$YNjUiUi5H?`)&fK(2P_Va^g4p8%{vsP4yPI!|=O{)V`trVsm>bB9Ehz%U$PJ1j zDf|nzEK3NJ!5I~E`l(^sj#I!5j+0NVjPzp95IT^DCH4RO2IiaxN7ZsTjbqxlR;?a0 zbLN<%xVa>kQ|{)zAG@pkkegS)ZK6^N(VjzO4c}j4W<#h+MTiM_UxEJ+0{tW|*i7z% ztT257Se{J0#}UMAjQB$cC5p0|bhIMNdjPS#CD%a`S3Vw92!=3_fFKQ?l)tQ9s%}%p z_T=Es`STGB+puB~(v59Q(IX^Jh>QR9;ET1~Y)0@|8}25MB=EUxD", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 23.966287851333618, "timesteps_since_restore": 12800, "iterations_since_restore": 1, "perf": {"cpu_util_percent": 43.42857142857144, "ram_util_percent": 57.03714285714286}} +{"episode_reward_max": 25.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.140625, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 14.0}, "policy_reward_mean": {"ppo": 4.5703125}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.140625, "shaped_reward_min": 0, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.734375, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.328125, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.65625, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.46875, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 4.109375, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 4.90625, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.65625, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.515625, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.03125, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 0.90625, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.90625, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.4375, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.15625, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.171875, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.4375, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.03125, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.953125, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.859375, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.53125, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.375, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.171875, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.046875, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.34375, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.21875, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.03125, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 0.90625, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.03125, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 0.90625, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0, 3.0, 6.0, 8.0, 6.0, 3.0, 9.0, 9.0, 9.0, 17.0, 25.0, 8.0, 6.0, 6.0, 3.0, 3.0, 17.0, 14.0, 17.0, 6.0, 3.0, 14.0, 11.0, 9.0, 3.0, 8.0, 11.0, 11.0, 14.0, 0.0, 3.0, 9.0, 6.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 3.0, 0.0, 0.0, 6.0, 0.0, 8.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 6.0, 3.0, 0.0, 9.0, 14.0, 3.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 14.0, 3.0, 14.0, 0.0, 11.0, 6.0, 6.0, 0.0, 3.0, 0.0, 9.0, 5.0, 3.0, 8.0, 3.0, 6.0, 3.0, 0.0, 3.0, 5.0, 3.0, 8.0, 8.0, 3.0, 11.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45293878883424954, "mean_processing_ms": 0.1625872490925028, "mean_inference_ms": 0.7353549498376587}, "off_policy_estimator": {}, "info": {"num_steps_trained": 48000, "num_steps_sampled": 25600, "sample_time_ms": 17329.738, "load_time_ms": 90.796, "grad_time_ms": 6126.382, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.10000000149011612, "cur_lr": 0.0010000000474974513, "total_loss": -0.35112297534942627, "policy_loss": -0.008805765770375729, "vf_loss": 0.7840461730957031, "vf_explained_var": -0.002521991729736328, "kl": 0.00048407851136289537, "entropy": 1.7883315086364746, "entropy_coeff": 0.19148799777030945, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 25600, "episodes_total": 64, "training_iteration": 2, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-13-45", "timestamp": 1660241625, "time_this_iter_s": 23.218619108200073, "time_total_s": 47.18490695953369, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 47.18490695953369, "timesteps_since_restore": 25600, "iterations_since_restore": 2, "perf": {"cpu_util_percent": 37.300000000000004, "ram_util_percent": 57.44117647058823}} +{"episode_reward_max": 25.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.052083333333334, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 16.0}, "policy_reward_mean": {"ppo": 4.526041666666667}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.052083333333334, "shaped_reward_min": 0, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.791666666666667, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.104166666666667, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.8854166666666665, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.260416666666667, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 4.208333333333333, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.697916666666667, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.5208333333333333, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.5104166666666667, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.0416666666666667, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 0.875, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.8229166666666665, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.46875, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.17708333333333334, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.3333333333333335, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.0416666666666665, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.9895833333333334, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.8125, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.4479166666666667, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.3854166666666667, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.14583333333333334, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.07291666666666667, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.2708333333333333, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.20833333333333334, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.0416666666666667, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 0.875, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.0416666666666667, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 0.875, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0, 3.0, 6.0, 8.0, 6.0, 3.0, 9.0, 9.0, 9.0, 17.0, 25.0, 8.0, 6.0, 6.0, 3.0, 3.0, 17.0, 14.0, 17.0, 6.0, 3.0, 14.0, 11.0, 9.0, 3.0, 8.0, 11.0, 11.0, 14.0, 0.0, 3.0, 9.0, 6.0, 0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 3.0, 0.0, 0.0, 6.0, 0.0, 8.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 6.0, 3.0, 0.0, 9.0, 14.0, 3.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 14.0, 3.0, 14.0, 0.0, 11.0, 6.0, 6.0, 0.0, 3.0, 0.0, 9.0, 5.0, 3.0, 8.0, 3.0, 6.0, 3.0, 0.0, 3.0, 5.0, 3.0, 8.0, 8.0, 3.0, 11.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45189034776232173, "mean_processing_ms": 0.1615299805648739, "mean_inference_ms": 0.7382304408765018}, "off_policy_estimator": {}, "info": {"num_steps_trained": 72000, "num_steps_sampled": 38400, "sample_time_ms": 17410.737, "load_time_ms": 73.556, "grad_time_ms": 6485.631, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.05000000074505806, "cur_lr": 0.0010000000474974513, "total_loss": -0.3324081599712372, "policy_loss": -0.005397057626396418, "vf_loss": 0.7086341977119446, "vf_explained_var": -0.000792384147644043, "kl": 0.0004734609683509916, "entropy": 1.7876968383789062, "entropy_coeff": 0.18297599256038666, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 38400, "episodes_total": 96, "training_iteration": 3, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-10", "timestamp": 1660241650, "time_this_iter_s": 24.84310221672058, "time_total_s": 72.02800917625427, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 72.02800917625427, "timesteps_since_restore": 38400, "iterations_since_restore": 3, "perf": {"cpu_util_percent": 38.59428571428571, "ram_util_percent": 57.505714285714284}} +{"episode_reward_max": 24.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 18.0}, "policy_reward_mean": {"ppo": 4.775}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.55, "shaped_reward_min": 0, "shaped_reward_max": 24, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.79, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.74, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.83, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.88, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 4.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.58, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.09, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.85, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.64, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.36, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.09, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.85, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.09, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.85, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0, 0.0, 3.0, 9.0, 6.0, 0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0, 9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45063714229107177, "mean_processing_ms": 0.15983031380236057, "mean_inference_ms": 0.739669952549497}, "off_policy_estimator": {}, "info": {"num_steps_trained": 96000, "num_steps_sampled": 51200, "sample_time_ms": 17369.146, "load_time_ms": 64.562, "grad_time_ms": 6646.22, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.02500000037252903, "cur_lr": 0.0010000000474974513, "total_loss": -0.3157036006450653, "policy_loss": -0.004088650923222303, "vf_loss": 0.8062646985054016, "vf_explained_var": 0.0032039880752563477, "kl": 0.0005627681966871023, "entropy": 1.7866708040237427, "entropy_coeff": 0.17446400225162506, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 51200, "episodes_total": 128, "training_iteration": 4, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-35", "timestamp": 1660241675, "time_this_iter_s": 24.43727397918701, "time_total_s": 96.46528315544128, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 96.46528315544128, "timesteps_since_restore": 51200, "iterations_since_restore": 4, "perf": {"cpu_util_percent": 39.84571428571428, "ram_util_percent": 57.64285714285714}} +{"episode_reward_max": 54.0, "episode_reward_min": 3.0, "episode_reward_mean": 10.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 28.0}, "policy_reward_mean": {"ppo": 5.42}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 10.44, "shaped_reward_min": 3, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.52, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.69, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.68, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.83, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.94, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.7, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.33, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.92, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.63, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.16, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.22, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.94, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.94, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0, 22.0, 3.0, 14.0, 14.0, 9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0, 8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45066530826567375, "mean_processing_ms": 0.15893004682590756, "mean_inference_ms": 0.7420671329840245}, "off_policy_estimator": {}, "info": {"num_steps_trained": 120000, "num_steps_sampled": 64000, "sample_time_ms": 17388.622, "load_time_ms": 59.359, "grad_time_ms": 6772.673, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.012500000186264515, "cur_lr": 0.0010000000474974513, "total_loss": -0.30251750349998474, "policy_loss": -0.006208862643688917, "vf_loss": 1.4635206460952759, "vf_explained_var": 0.0046030678786337376, "kl": 0.0005594257963821292, "entropy": 1.7864326238632202, "entropy_coeff": 0.16595199704170227, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 64000, "episodes_total": 160, "training_iteration": 5, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-59", "timestamp": 1660241699, "time_this_iter_s": 24.809880018234253, "time_total_s": 121.27516317367554, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 121.27516317367554, "timesteps_since_restore": 64000, "iterations_since_restore": 5, "perf": {"cpu_util_percent": 39.71142857142857, "ram_util_percent": 57.60285714285714}} +{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 12.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 6.195}, "custom_metrics": {"sparse_reward_mean": 0.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 11.59, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.22, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.75, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.39, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.9, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 4.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.99, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.99, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.99, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0, 16.0, 8.0, 6.0, 9.0, 8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0, 16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0]}, "sampler_perf": {"mean_env_wait_ms": 0.4527646689746759, "mean_processing_ms": 0.15894443082147025, "mean_inference_ms": 0.7459920431247151}, "off_policy_estimator": {}, "info": {"num_steps_trained": 144000, "num_steps_sampled": 76800, "sample_time_ms": 17631.781, "load_time_ms": 55.878, "grad_time_ms": 7055.375, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0062500000931322575, "cur_lr": 0.0010000000474974513, "total_loss": -0.285332590341568, "policy_loss": -0.004330330062657595, "vf_loss": 1.753544807434082, "vf_explained_var": 0.007292529102414846, "kl": 0.0005500561674125493, "entropy": 1.7859567403793335, "entropy_coeff": 0.15744000673294067, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 76800, "episodes_total": 192, "training_iteration": 6, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-15-27", "timestamp": 1660241727, "time_this_iter_s": 27.381940841674805, "time_total_s": 148.65710401535034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 148.65710401535034, "timesteps_since_restore": 76800, "iterations_since_restore": 6, "perf": {"cpu_util_percent": 46.235897435897435, "ram_util_percent": 57.91025641025641}} +{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 6.98}, "custom_metrics": {"sparse_reward_mean": 0.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.36, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.26, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.86, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.13, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.81, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 4.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.74, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.13, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.16, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.81, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.17, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.22, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.85, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.23, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.31, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.13, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.16, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.13, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.16, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0, 11.0, 14.0, 14.0, 9.0, 16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0, 14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 7.041783650517479, "mean_processing_ms": 0.16106120541031635, "mean_inference_ms": 2.816008339885107}, "off_policy_estimator": {}, "info": {"num_steps_trained": 168000, "num_steps_sampled": 89600, "sample_time_ms": 363821.155, "load_time_ms": 54.883, "grad_time_ms": 7100.098, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0031250000465661287, "cur_lr": 0.0010000000474974513, "total_loss": -0.26905307173728943, "policy_loss": -0.0034452469553798437, "vf_loss": 2.160554885864258, "vf_explained_var": 0.012243330478668213, "kl": 0.0006163662183098495, "entropy": 1.784928321838379, "entropy_coeff": 0.14892800152301788, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 89600, "episodes_total": 224, "training_iteration": 7, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-56-15", "timestamp": 1660244175, "time_this_iter_s": 2448.401287794113, "time_total_s": 2597.0583918094635, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2597.0583918094635, "timesteps_since_restore": 89600, "iterations_since_restore": 7, "perf": {"cpu_util_percent": 53.55, "ram_util_percent": 58.647826086956535}} +{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 14.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 7.205}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.41, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.32, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.44, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.61, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.2, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.11, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.53, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.41, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.2, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.11, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.2, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.11, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0, 3.0, 14.0, 3.0, 3.0, 14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0, 6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 12.808520770186506, "mean_processing_ms": 0.16344551542853641, "mean_inference_ms": 4.631216998135988}, "off_policy_estimator": {}, "info": {"num_steps_trained": 192000, "num_steps_sampled": 102400, "sample_time_ms": 320761.187, "load_time_ms": 52.674, "grad_time_ms": 7271.518, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0015625000232830644, "cur_lr": 0.0010000000474974513, "total_loss": -0.25552046298980713, "policy_loss": -0.005265455227345228, "vf_loss": 1.9171754121780396, "vf_explained_var": 0.015465259552001953, "kl": 0.0006017824052833021, "entropy": 1.7836121320724487, "entropy_coeff": 0.1404159963130951, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 102400, "episodes_total": 256, "training_iteration": 8, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-56-43", "timestamp": 1660244203, "time_this_iter_s": 27.877708196640015, "time_total_s": 2624.9361000061035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2624.9361000061035, "timesteps_since_restore": 102400, "iterations_since_restore": 8, "perf": {"cpu_util_percent": 42.6075, "ram_util_percent": 58.39000000000001}} +{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 16.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 8.09}, "custom_metrics": {"sparse_reward_mean": 1.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.78, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.25, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.27, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.46, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.21, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 1.33, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 3.05, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.19, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.22, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.54, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.49, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.21, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 1.33, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.21, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 1.33, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0, 68.0, 20.0, 9.0, 8.0, 6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0, 11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 17.933374555696833, "mean_processing_ms": 0.16517403131021888, "mean_inference_ms": 6.240884020190002}, "off_policy_estimator": {}, "info": {"num_steps_trained": 216000, "num_steps_sampled": 115200, "sample_time_ms": 287052.402, "load_time_ms": 50.896, "grad_time_ms": 7408.088, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0007812500116415322, "cur_lr": 0.0010000000474974513, "total_loss": -0.24049125611782074, "policy_loss": -0.005544388201087713, "vf_loss": 1.8025983572006226, "vf_explained_var": 0.016161540523171425, "kl": 0.0006836934480816126, "entropy": 1.7825666666030884, "entropy_coeff": 0.1319040060043335, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 115200, "episodes_total": 288, "training_iteration": 9, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-57-09", "timestamp": 1660244229, "time_this_iter_s": 25.946558237075806, "time_total_s": 2650.8826582431793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2650.8826582431793, "timesteps_since_restore": 115200, "iterations_since_restore": 9, "perf": {"cpu_util_percent": 38.36216216216216, "ram_util_percent": 57.93513513513512}} +{"episode_reward_max": 57.0, "episode_reward_min": 0.0, "episode_reward_mean": 15.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 34.0}, "policy_reward_mean": {"ppo": 7.83}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.66, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.85, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.23, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.74, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 1.43, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.81, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.93, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.89, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.25, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.33, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 1.43, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 1.43, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0, 14.0, 8.0, 57.0, 9.0, 11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0, 6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 16.781937376069852, "mean_processing_ms": 0.16502285382446033, "mean_inference_ms": 5.880940450206554}, "off_policy_estimator": {}, "info": {"num_steps_trained": 240000, "num_steps_sampled": 128000, "sample_time_ms": 260081.675, "load_time_ms": 49.456, "grad_time_ms": 7564.799, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0003906250058207661, "cur_lr": 0.0010000000474974513, "total_loss": -0.22780847549438477, "policy_loss": -0.00820181891322136, "vf_loss": 1.5030304193496704, "vf_explained_var": 0.01960124634206295, "kl": 0.0007011755951680243, "entropy": 1.7809678316116333, "entropy_coeff": 0.1233920007944107, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 128000, "episodes_total": 320, "training_iteration": 10, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-57-36", "timestamp": 1660244256, "time_this_iter_s": 26.38225793838501, "time_total_s": 2677.2649161815643, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2677.2649161815643, "timesteps_since_restore": 128000, "iterations_since_restore": 10, "perf": {"cpu_util_percent": 35.91621621621621, "ram_util_percent": 58.01891891891893}} +{"episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 16.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 8.47}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 14.94, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.74, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.22, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.89, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.18, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.59, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.46, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.85, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.65, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.18, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.59, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.18, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.59, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0, 11.0, 11.0, 22.0, 11.0, 6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0, 14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 15.10732110386456, "mean_processing_ms": 0.16432950718550896, "mean_inference_ms": 5.354497783846054}, "off_policy_estimator": {}, "info": {"num_steps_trained": 264000, "num_steps_sampled": 140800, "sample_time_ms": 260129.157, "load_time_ms": 39.286, "grad_time_ms": 7759.155, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.00019531250291038305, "cur_lr": 0.0010000000474974513, "total_loss": -0.21097473800182343, "policy_loss": -0.006903436034917831, "vf_loss": 2.839796781539917, "vf_explained_var": 0.029899099841713905, "kl": 0.0006908049690537155, "entropy": 1.7788597345352173, "entropy_coeff": 0.11488000303506851, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 140800, "episodes_total": 352, "training_iteration": 11, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-58-02", "timestamp": 1660244282, "time_this_iter_s": 26.244181156158447, "time_total_s": 2703.509097337723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2703.509097337723, "timesteps_since_restore": 140800, "iterations_since_restore": 11, "perf": {"cpu_util_percent": 36.42631578947368, "ram_util_percent": 57.83157894736843}} +{"episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 18.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 40.0}, "policy_reward_mean": {"ppo": 9.17}, "custom_metrics": {"sparse_reward_mean": 1.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 15.54, "shaped_reward_min": 3, "shaped_reward_max": 31, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.67, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.21, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.04, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.18, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.74, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.55, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.98, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.61, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.42, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.18, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.74, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.18, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.74, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0, 14.0, 16.0, 14.0, 17.0, 14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0, 17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0]}, "sampler_perf": {"mean_env_wait_ms": 13.746840147909538, "mean_processing_ms": 0.16393515170904638, "mean_inference_ms": 4.926997257535761}, "off_policy_estimator": {}, "info": {"num_steps_trained": 288000, "num_steps_sampled": 153600, "sample_time_ms": 260207.009, "load_time_ms": 38.842, "grad_time_ms": 8068.402, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.765625145519152e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.19599168002605438, "policy_loss": -0.007250078488141298, "vf_loss": 2.85541033744812, "vf_explained_var": 0.045025069266557693, "kl": 0.0006896388367749751, "entropy": 1.7771064043045044, "entropy_coeff": 0.10636799782514572, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 153600, "episodes_total": 384, "training_iteration": 12, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-58-29", "timestamp": 1660244309, "time_this_iter_s": 27.08998394012451, "time_total_s": 2730.5990812778473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2730.5990812778473, "timesteps_since_restore": 153600, "iterations_since_restore": 12, "perf": {"cpu_util_percent": 37.42368421052632, "ram_util_percent": 58.20789473684212}} +{"episode_reward_max": 76.0, "episode_reward_min": 3.0, "episode_reward_mean": 21.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 45.0}, "policy_reward_mean": {"ppo": 10.6}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 17.6, "shaped_reward_min": 3, "shaped_reward_max": 37, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.63, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.81, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.24, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.08, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.98, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 0.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.74, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.54, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.98, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.98, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0, 9.0, 17.0, 17.0, 3.0, 17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0, 20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 12.621089528193751, "mean_processing_ms": 0.1639912702671728, "mean_inference_ms": 4.5967771099672925}, "off_policy_estimator": {}, "info": {"num_steps_trained": 312000, "num_steps_sampled": 166400, "sample_time_ms": 261554.79, "load_time_ms": 38.918, "grad_time_ms": 8201.071, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.882812572759576e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.18183590471744537, "policy_loss": -0.00839205738157034, "vf_loss": 3.3925907611846924, "vf_explained_var": 0.04012133553624153, "kl": 0.0007842599879950285, "entropy": 1.775907039642334, "entropy_coeff": 0.09785600006580353, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 166400, "episodes_total": 416, "training_iteration": 13, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-59-09", "timestamp": 1660244349, "time_this_iter_s": 39.64977407455444, "time_total_s": 2770.2488553524017, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2770.2488553524017, "timesteps_since_restore": 166400, "iterations_since_restore": 13, "perf": {"cpu_util_percent": 43.457142857142856, "ram_util_percent": 59.38095238095238}} +{"episode_reward_max": 76.0, "episode_reward_min": 3.0, "episode_reward_mean": 22.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 45.0}, "policy_reward_mean": {"ppo": 11.325}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 19.05, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.74, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.54, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.91, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.51, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.98, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.23, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.27, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.88, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 1.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.6, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.51, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.98, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.51, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.98, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0, 27.0, 25.0, 11.0, 8.0, 20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0, 76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0]}, "sampler_perf": {"mean_env_wait_ms": 11.674808393625103, "mean_processing_ms": 0.16471740447247565, "mean_inference_ms": 4.325696825802514}, "off_policy_estimator": {}, "info": {"num_steps_trained": 336000, "num_steps_sampled": 179200, "sample_time_ms": 262067.662, "load_time_ms": 39.631, "grad_time_ms": 8508.383, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.441406286379788e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.169602632522583, "policy_loss": -0.011421400122344494, "vf_loss": 3.2296648025512695, "vf_explained_var": 0.07911600917577744, "kl": 0.0008258241578005254, "entropy": 1.7740892171859741, "entropy_coeff": 0.08934400230646133, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 179200, "episodes_total": 448, "training_iteration": 14, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-59-41", "timestamp": 1660244381, "time_this_iter_s": 32.64548587799072, "time_total_s": 2802.8943412303925, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2802.8943412303925, "timesteps_since_restore": 179200, "iterations_since_restore": 14, "perf": {"cpu_util_percent": 44.92765957446808, "ram_util_percent": 58.32340425531916}} +{"episode_reward_max": 84.0, "episode_reward_min": 3.0, "episode_reward_mean": 23.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 11.545}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 19.49, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.62, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.9, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 2.94, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.0, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.61, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.53, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.07, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.42, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.91, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.13, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 1.03, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.69, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.59, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.53, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.07, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.53, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.07, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0, 11.0, 11.0, 14.0, 14.0, 76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0, 19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 10.867209759257396, "mean_processing_ms": 0.16544513042105083, "mean_inference_ms": 4.098041869605518}, "off_policy_estimator": {}, "info": {"num_steps_trained": 360000, "num_steps_sampled": 192000, "sample_time_ms": 262263.341, "load_time_ms": 39.79, "grad_time_ms": 8700.33, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.220703143189894e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.14661313593387604, "policy_loss": -0.0037220455706119537, "vf_loss": 3.146031618118286, "vf_explained_var": 0.09564539045095444, "kl": 0.0008609917131252587, "entropy": 1.7716461420059204, "entropy_coeff": 0.08083199709653854, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 192000, "episodes_total": 480, "training_iteration": 15, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-00-10", "timestamp": 1660244410, "time_this_iter_s": 28.69369125366211, "time_total_s": 2831.5880324840546, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2831.5880324840546, "timesteps_since_restore": 192000, "iterations_since_restore": 15, "perf": {"cpu_util_percent": 32.489999999999995, "ram_util_percent": 57.802499999999995}} +{"episode_reward_max": 84.0, "episode_reward_min": 3.0, "episode_reward_mean": 25.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 12.94}, "custom_metrics": {"sparse_reward_mean": 2.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 20.68, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.11, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.01, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.28, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.06, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.49, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.61, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.25, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.28, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.34, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 1.13, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.58, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.62, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.61, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.25, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.61, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.25, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0, 60.0, 25.0, 25.0, 34.0, 19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0, 17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 10.168319641902713, "mean_processing_ms": 0.16598079643560795, "mean_inference_ms": 3.884933201233045}, "off_policy_estimator": {}, "info": {"num_steps_trained": 384000, "num_steps_sampled": 204800, "sample_time_ms": 262331.542, "load_time_ms": 40.118, "grad_time_ms": 8762.055, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.10351571594947e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.13645179569721222, "policy_loss": -0.00917948316782713, "vf_loss": 5.045528888702393, "vf_explained_var": 0.08776132017374039, "kl": 0.0009270868613384664, "entropy": 1.7668260335922241, "entropy_coeff": 0.07231999933719635, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 204800, "episodes_total": 512, "training_iteration": 16, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-00-39", "timestamp": 1660244439, "time_this_iter_s": 28.684066772460938, "time_total_s": 2860.2720992565155, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2860.2720992565155, "timesteps_since_restore": 204800, "iterations_since_restore": 16, "perf": {"cpu_util_percent": 36.62439024390244, "ram_util_percent": 57.81951219512195}} +{"episode_reward_max": 84.0, "episode_reward_min": 6.0, "episode_reward_mean": 27.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 13.93}, "custom_metrics": {"sparse_reward_mean": 3.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.06, "shaped_reward_min": 6, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.37, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.13, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.43, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.09, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.71, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.35, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.08, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.84, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.38, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 1.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.61, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.71, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.35, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.71, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.35, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0, 12.0, 17.0, 20.0, 20.0, 17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0, 17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 9.556971747293405, "mean_processing_ms": 0.16611483871912305, "mean_inference_ms": 3.69458132267773}, "off_policy_estimator": {}, "info": {"num_steps_trained": 408000, "num_steps_sampled": 217600, "sample_time_ms": 20254.523, "load_time_ms": 38.861, "grad_time_ms": 8931.015, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.051757857974735e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.12081533670425415, "policy_loss": -0.008616355247795582, "vf_loss": 5.614309310913086, "vf_explained_var": 0.13559557497501373, "kl": 0.0008749772678129375, "entropy": 1.76718270778656, "entropy_coeff": 0.06380800157785416, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 217600, "episodes_total": 544, "training_iteration": 17, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-01-08", "timestamp": 1660244468, "time_this_iter_s": 29.310136079788208, "time_total_s": 2889.5822353363037, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2889.5822353363037, "timesteps_since_restore": 217600, "iterations_since_restore": 17, "perf": {"cpu_util_percent": 39.28333333333334, "ram_util_percent": 57.69761904761903}} +{"episode_reward_max": 82.0, "episode_reward_min": 6.0, "episode_reward_mean": 31.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 50.0}, "policy_reward_mean": {"ppo": 15.58}, "custom_metrics": {"sparse_reward_mean": 4.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 23.16, "shaped_reward_min": 6, "shaped_reward_max": 47, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.73, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.83, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.76, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.82, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.25, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.94, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.25, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.25, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0, 19.0, 6.0, 76.0, 25.0, 17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0, 17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 9.01879810548929, "mean_processing_ms": 0.16630796767792247, "mean_inference_ms": 3.5295458802458652}, "off_policy_estimator": {}, "info": {"num_steps_trained": 432000, "num_steps_sampled": 230400, "sample_time_ms": 20513.179, "load_time_ms": 38.872, "grad_time_ms": 8952.524, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.5258789289873675e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.10658890753984451, "policy_loss": -0.009598230011761189, "vf_loss": 4.846475601196289, "vf_explained_var": 0.11691506952047348, "kl": 0.0009377954411320388, "entropy": 1.762791633605957, "entropy_coeff": 0.055296000093221664, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 230400, "episodes_total": 576, "training_iteration": 18, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-01-39", "timestamp": 1660244499, "time_this_iter_s": 30.67889380455017, "time_total_s": 2920.261129140854, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2920.261129140854, "timesteps_since_restore": 230400, "iterations_since_restore": 18, "perf": {"cpu_util_percent": 40.46744186046512, "ram_util_percent": 57.767441860465105}} +{"episode_reward_max": 88.0, "episode_reward_min": 8.0, "episode_reward_mean": 32.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 53.0}, "policy_reward_mean": {"ppo": 16.475}, "custom_metrics": {"sparse_reward_mean": 3.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 25.35, "shaped_reward_min": 8, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.56, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.65, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.81, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.27, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.23, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.91, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.94, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.27, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.23, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.27, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.23, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0, 14.0, 11.0, 17.0, 19.0, 17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0, 76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0]}, "sampler_perf": {"mean_env_wait_ms": 8.541158675961446, "mean_processing_ms": 0.1664622179417048, "mean_inference_ms": 3.3849019348875076}, "off_policy_estimator": {}, "info": {"num_steps_trained": 456000, "num_steps_sampled": 243200, "sample_time_ms": 20842.585, "load_time_ms": 38.976, "grad_time_ms": 8953.309, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 7.629394644936838e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.09107109159231186, "policy_loss": -0.009291496127843857, "vf_loss": 5.607062816619873, "vf_explained_var": 0.08896120637655258, "kl": 0.0008400729275308549, "entropy": 1.7600102424621582, "entropy_coeff": 0.04678399860858917, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 243200, "episodes_total": 608, "training_iteration": 19, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-02-08", "timestamp": 1660244528, "time_this_iter_s": 29.248838186264038, "time_total_s": 2949.509967327118, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2949.509967327118, "timesteps_since_restore": 243200, "iterations_since_restore": 19, "perf": {"cpu_util_percent": 33.43571428571428, "ram_util_percent": 57.790476190476205}} +{"episode_reward_max": 88.0, "episode_reward_min": 8.0, "episode_reward_mean": 37.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 18.55}, "custom_metrics": {"sparse_reward_mean": 4.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 27.5, "shaped_reward_min": 8, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.78, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.75, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.94, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.45, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.42, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.13, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.31, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.65, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.62, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.45, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.42, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.45, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.42, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0, 68.0, 68.0, 17.0, 25.0, 76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0, 34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 8.114151695990262, "mean_processing_ms": 0.1664756703889973, "mean_inference_ms": 3.2561915126083236}, "off_policy_estimator": {}, "info": {"num_steps_trained": 480000, "num_steps_sampled": 256000, "sample_time_ms": 21118.245, "load_time_ms": 39.16, "grad_time_ms": 8931.236, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.814697322468419e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.07289835065603256, "policy_loss": -0.006338973995298147, "vf_loss": 7.939427852630615, "vf_explained_var": 0.1275780349969864, "kl": 0.000996587099507451, "entropy": 1.7598587274551392, "entropy_coeff": 0.03827200084924698, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 256000, "episodes_total": 640, "training_iteration": 20, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-02-37", "timestamp": 1660244557, "time_this_iter_s": 28.921189069747925, "time_total_s": 2978.431156396866, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2978.431156396866, "timesteps_since_restore": 256000, "iterations_since_restore": 20, "perf": {"cpu_util_percent": 30.78048780487805, "ram_util_percent": 57.77073170731706}} +{"episode_reward_max": 116.0, "episode_reward_min": 9.0, "episode_reward_mean": 39.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 19.52}, "custom_metrics": {"sparse_reward_mean": 5.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 27.44, "shaped_reward_min": 9, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.52, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.9, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.3, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.63, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.32, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.49, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.57, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.69, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.3, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.63, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.3, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.63, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0, 71.0, 57.0, 30.0, 9.0, 34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0, 23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 7.729743796447544, "mean_processing_ms": 0.1663097758329898, "mean_inference_ms": 3.1401875416297957}, "off_policy_estimator": {}, "info": {"num_steps_trained": 504000, "num_steps_sampled": 268800, "sample_time_ms": 21421.608, "load_time_ms": 38.85, "grad_time_ms": 8956.305, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.9073486612342094e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.05712709203362465, "policy_loss": -0.005733281373977661, "vf_loss": 7.644298553466797, "vf_explained_var": 0.10351377725601196, "kl": 0.0011409734142944217, "entropy": 1.7526286840438843, "entropy_coeff": 0.029759999364614487, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 268800, "episodes_total": 672, "training_iteration": 21, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-03-07", "timestamp": 1660244587, "time_this_iter_s": 29.522944927215576, "time_total_s": 3007.9541013240814, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3007.9541013240814, "timesteps_since_restore": 268800, "iterations_since_restore": 21, "perf": {"cpu_util_percent": 34.18809523809524, "ram_util_percent": 57.730952380952374}} +{"episode_reward_max": 116.0, "episode_reward_min": 9.0, "episode_reward_mean": 37.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 18.735}, "custom_metrics": {"sparse_reward_mean": 5.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 26.67, "shaped_reward_min": 9, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.51, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.51, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.59, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.49, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.55, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.18, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.81, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.35, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.73, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.55, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.55, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0, 20.0, 31.0, 31.0, 17.0, 23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0, 25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0]}, "sampler_perf": {"mean_env_wait_ms": 7.382168276478265, "mean_processing_ms": 0.16610498275202595, "mean_inference_ms": 3.034581035402087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 528000, "num_steps_sampled": 281600, "sample_time_ms": 21629.558, "load_time_ms": 39.186, "grad_time_ms": 8958.867, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.536743306171047e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.045350998640060425, "policy_loss": -0.008628163486719131, "vf_loss": 5.3433098793029785, "vf_explained_var": 0.18131445348262787, "kl": 0.0009087324724532664, "entropy": 1.7534428834915161, "entropy_coeff": 0.021247999742627144, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 281600, "episodes_total": 704, "training_iteration": 22, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-03-36", "timestamp": 1660244616, "time_this_iter_s": 29.197812795639038, "time_total_s": 3037.1519141197205, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3037.1519141197205, "timesteps_since_restore": 281600, "iterations_since_restore": 22, "perf": {"cpu_util_percent": 36.32142857142857, "ram_util_percent": 57.66904761904762}} +{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 40.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 20.145}, "custom_metrics": {"sparse_reward_mean": 6.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 28.29, "shaped_reward_min": 9, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.54, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.44, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.7, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.43, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.7, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.2, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.37, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.49, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.55, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.82, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.67, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.43, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.7, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.43, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.7, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0, 42.0, 79.0, 42.0, 20.0, 25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0, 73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0]}, "sampler_perf": {"mean_env_wait_ms": 7.066502405705404, "mean_processing_ms": 0.16594670413037083, "mean_inference_ms": 2.938769748646424}, "off_policy_estimator": {}, "info": {"num_steps_trained": 552000, "num_steps_sampled": 294400, "sample_time_ms": 20554.7, "load_time_ms": 39.212, "grad_time_ms": 9005.588, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.7683716530855236e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.025955183431506157, "policy_loss": -0.0047044227831065655, "vf_loss": 9.28939437866211, "vf_explained_var": 0.20618398487567902, "kl": 0.0012655678438022733, "entropy": 1.741496205329895, "entropy_coeff": 0.012736000120639801, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 294400, "episodes_total": 736, "training_iteration": 23, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-04-05", "timestamp": 1660244645, "time_this_iter_s": 29.364897966384888, "time_total_s": 3066.5168120861053, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3066.5168120861053, "timesteps_since_restore": 294400, "iterations_since_restore": 23, "perf": {"cpu_util_percent": 34.0, "ram_util_percent": 57.70487804878048}} +{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 43.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 21.51}, "custom_metrics": {"sparse_reward_mean": 6.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 29.42, "shaped_reward_min": 9, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.56, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.35, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.71, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.74, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.68, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.66, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.29, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.32, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.46, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.47, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.76, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.68, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.66, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.68, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.66, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0, 116.0, 45.0, 79.0, 80.0, 73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0, 63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0]}, "sampler_perf": {"mean_env_wait_ms": 6.778628865492887, "mean_processing_ms": 0.1658515378049688, "mean_inference_ms": 2.8523193064237637}, "off_policy_estimator": {}, "info": {"num_steps_trained": 576000, "num_steps_sampled": 307200, "sample_time_ms": 20518.226, "load_time_ms": 38.592, "grad_time_ms": 9000.858, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.3841858265427618e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.01208301167935133, "policy_loss": -0.0054773432202637196, "vf_loss": 7.661229610443115, "vf_explained_var": 0.24070757627487183, "kl": 0.001010378822684288, "entropy": 1.7452141046524048, "entropy_coeff": 0.004224000032991171, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 307200, "episodes_total": 768, "training_iteration": 24, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-04-37", "timestamp": 1660244677, "time_this_iter_s": 32.22774386405945, "time_total_s": 3098.744555950165, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3098.744555950165, "timesteps_since_restore": 307200, "iterations_since_restore": 24, "perf": {"cpu_util_percent": 33.75869565217391, "ram_util_percent": 57.7217391304348}} +{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 45.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 67.0}, "policy_reward_mean": {"ppo": 22.71}, "custom_metrics": {"sparse_reward_mean": 7.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 30.62, "shaped_reward_min": 9, "shaped_reward_max": 56, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.66, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.55, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.23, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 2.55, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.99, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.36, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.91, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.57, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.79, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.87, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.55, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.99, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.55, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.99, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0, 50.0, 23.0, 12.0, 30.0, 63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0, 9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 6.515320221213355, "mean_processing_ms": 0.1659420994573044, "mean_inference_ms": 2.777987966122339}, "off_policy_estimator": {}, "info": {"num_steps_trained": 600000, "num_steps_sampled": 320000, "sample_time_ms": 21126.015, "load_time_ms": 38.663, "grad_time_ms": 9096.932, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1920929132713809e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.00892395805567503, "policy_loss": -0.008948341012001038, "vf_loss": 8.925480842590332, "vf_explained_var": 0.24435751140117645, "kl": 0.0012184166116639972, "entropy": 1.7363275289535522, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 320000, "episodes_total": 800, "training_iteration": 25, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-05-13", "timestamp": 1660244713, "time_this_iter_s": 35.73040580749512, "time_total_s": 3134.47496175766, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3134.47496175766, "timesteps_since_restore": 320000, "iterations_since_restore": 25, "perf": {"cpu_util_percent": 36.32, "ram_util_percent": 57.904}} +{"episode_reward_max": 142.0, "episode_reward_min": 9.0, "episode_reward_mean": 46.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 23.465}, "custom_metrics": {"sparse_reward_mean": 7.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 31.33, "shaped_reward_min": 9, "shaped_reward_max": 65, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.5, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.81, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.81, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.7, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.0, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.76, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.68, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.76, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.7, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.0, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.7, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.0, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0, 34.0, 17.0, 60.0, 85.0, 9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0, 38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0]}, "sampler_perf": {"mean_env_wait_ms": 6.273535039263677, "mean_processing_ms": 0.16612280496799353, "mean_inference_ms": 2.713084381170351}, "off_policy_estimator": {}, "info": {"num_steps_trained": 624000, "num_steps_sampled": 332800, "sample_time_ms": 21568.501, "load_time_ms": 38.757, "grad_time_ms": 9275.576, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.9604645663569045e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.011042184196412563, "policy_loss": -0.01108124852180481, "vf_loss": 9.051116943359375, "vf_explained_var": 0.3293954133987427, "kl": 0.0011855209013447165, "entropy": 1.7320860624313354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 332800, "episodes_total": 832, "training_iteration": 26, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-05-48", "timestamp": 1660244748, "time_this_iter_s": 34.898388147354126, "time_total_s": 3169.373349905014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3169.373349905014, "timesteps_since_restore": 332800, "iterations_since_restore": 26, "perf": {"cpu_util_percent": 38.21224489795919, "ram_util_percent": 57.97551020408163}} +{"episode_reward_max": 142.0, "episode_reward_min": 9.0, "episode_reward_mean": 48.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 24.285}, "custom_metrics": {"sparse_reward_mean": 8.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 32.17, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.8, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.51, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.69, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.14, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.9, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.7, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.75, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.87, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.69, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.14, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.69, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.14, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0, 25.0, 20.0, 17.0, 25.0, 38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0, 9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 6.050716001931983, "mean_processing_ms": 0.1663751803875143, "mean_inference_ms": 2.655688107582492}, "off_policy_estimator": {}, "info": {"num_steps_trained": 648000, "num_steps_sampled": 345600, "sample_time_ms": 22006.752, "load_time_ms": 38.851, "grad_time_ms": 9447.322, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.9802322831784522e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.010166086256504059, "policy_loss": -0.010217566043138504, "vf_loss": 9.166760444641113, "vf_explained_var": 0.3867878019809723, "kl": 0.001088446588255465, "entropy": 1.7303863763809204, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 345600, "episodes_total": 864, "training_iteration": 27, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-06-24", "timestamp": 1660244784, "time_this_iter_s": 35.4101459980011, "time_total_s": 3204.783495903015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3204.783495903015, "timesteps_since_restore": 345600, "iterations_since_restore": 27, "perf": {"cpu_util_percent": 38.552, "ram_util_percent": 58.32}} +{"episode_reward_max": 145.0, "episode_reward_min": 9.0, "episode_reward_mean": 55.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 27.97}, "custom_metrics": {"sparse_reward_mean": 10.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 35.94, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.64, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.83, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 5.04, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.73, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.69, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.71, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.87, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.97, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.73, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.69, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.73, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.69, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0, 17.0, 42.0, 79.0, 71.0, 9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0, 44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 5.8446114836959895, "mean_processing_ms": 0.16658688353388335, "mean_inference_ms": 2.6012841728705705}, "off_policy_estimator": {}, "info": {"num_steps_trained": 672000, "num_steps_sampled": 358400, "sample_time_ms": 22190.055, "load_time_ms": 38.857, "grad_time_ms": 9598.179, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.4901161415892261e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.008647923357784748, "policy_loss": -0.00907482486218214, "vf_loss": 12.8626708984375, "vf_explained_var": 0.32375723123550415, "kl": 0.0009376012603752315, "entropy": 1.7187572717666626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 358400, "episodes_total": 896, "training_iteration": 28, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-06-58", "timestamp": 1660244818, "time_this_iter_s": 34.0201780796051, "time_total_s": 3238.8036739826202, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3238.8036739826202, "timesteps_since_restore": 358400, "iterations_since_restore": 28, "perf": {"cpu_util_percent": 38.32708333333333, "ram_util_percent": 57.17499999999999}} +{"episode_reward_max": 145.0, "episode_reward_min": 9.0, "episode_reward_mean": 55.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 76.0}, "policy_reward_mean": {"ppo": 27.675}, "custom_metrics": {"sparse_reward_mean": 9.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 36.15, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.69, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.84, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.96, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.75, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.61, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.31, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.89, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.75, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.61, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.75, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.61, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0, 9.0, 20.0, 22.0, 31.0, 44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0, 93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0]}, "sampler_perf": {"mean_env_wait_ms": 5.653214857264523, "mean_processing_ms": 0.16674718188605944, "mean_inference_ms": 2.549555614199102}, "off_policy_estimator": {}, "info": {"num_steps_trained": 696000, "num_steps_sampled": 371200, "sample_time_ms": 22379.228, "load_time_ms": 38.745, "grad_time_ms": 9750.752, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.450580707946131e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.007846680469810963, "policy_loss": -0.007850968278944492, "vf_loss": 8.63664722442627, "vf_explained_var": 0.4092896282672882, "kl": 0.001057352521456778, "entropy": 1.7187713384628296, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 371200, "episodes_total": 928, "training_iteration": 29, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-07-30", "timestamp": 1660244850, "time_this_iter_s": 32.66524410247803, "time_total_s": 3271.4689180850983, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3271.4689180850983, "timesteps_since_restore": 371200, "iterations_since_restore": 29, "perf": {"cpu_util_percent": 40.74130434782609, "ram_util_percent": 58.79130434782609}} +{"episode_reward_max": 179.0, "episode_reward_min": 9.0, "episode_reward_mean": 58.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 92.0}, "policy_reward_mean": {"ppo": 29.3}, "custom_metrics": {"sparse_reward_mean": 10.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 38.2, "shaped_reward_min": 9, "shaped_reward_max": 70, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.55, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 7.06, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.05, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.26, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.83, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.86, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.29, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.45, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.9, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.07, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.92, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.83, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.86, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.83, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.86, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0, 66.0, 76.0, 9.0, 38.0, 93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0, 39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0]}, "sampler_perf": {"mean_env_wait_ms": 5.475175554008815, "mean_processing_ms": 0.16692031317355585, "mean_inference_ms": 2.4996312531417773}, "off_policy_estimator": {}, "info": {"num_steps_trained": 720000, "num_steps_sampled": 384000, "sample_time_ms": 22626.486, "load_time_ms": 38.637, "grad_time_ms": 9834.774, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.7252903539730653e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.010750534944236279, "policy_loss": -0.01101712416857481, "vf_loss": 11.21933650970459, "vf_explained_var": 0.33813270926475525, "kl": 0.0012414826778694987, "entropy": 1.7106833457946777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 384000, "episodes_total": 960, "training_iteration": 30, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-08-02", "timestamp": 1660244882, "time_this_iter_s": 32.23107981681824, "time_total_s": 3303.6999979019165, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3303.6999979019165, "timesteps_since_restore": 384000, "iterations_since_restore": 30, "perf": {"cpu_util_percent": 45.34130434782608, "ram_util_percent": 56.88478260869565}} +{"episode_reward_max": 182.0, "episode_reward_min": 9.0, "episode_reward_mean": 57.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 96.0}, "policy_reward_mean": {"ppo": 28.805}, "custom_metrics": {"sparse_reward_mean": 10.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 36.81, "shaped_reward_min": 9, "shaped_reward_max": 65, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.93, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.72, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.83, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.99, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.62, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.62, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.3, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.64, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.68, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.73, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.82, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.99, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.62, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.99, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.62, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0, 44.0, 12.0, 20.0, 71.0, 39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0, 90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0]}, "sampler_perf": {"mean_env_wait_ms": 5.308917467290777, "mean_processing_ms": 0.16704433507360725, "mean_inference_ms": 2.4512479594099825}, "off_policy_estimator": {}, "info": {"num_steps_trained": 744000, "num_steps_sampled": 396800, "sample_time_ms": 22677.978, "load_time_ms": 38.542, "grad_time_ms": 9985.158, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8626451769865326e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.00831019040197134, "policy_loss": -0.008908797055482864, "vf_loss": 14.524895668029785, "vf_explained_var": 0.35295844078063965, "kl": 0.0011723049683496356, "entropy": 1.7077676057815552, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 396800, "episodes_total": 992, "training_iteration": 31, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-08-34", "timestamp": 1660244914, "time_this_iter_s": 31.540908813476562, "time_total_s": 3335.240906715393, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3335.240906715393, "timesteps_since_restore": 396800, "iterations_since_restore": 31, "perf": {"cpu_util_percent": 42.184090909090905, "ram_util_percent": 56.9090909090909}} +{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 63.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 31.655}, "custom_metrics": {"sparse_reward_mean": 12.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 39.31, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.12, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.89, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.08, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.95, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 4.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.77, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.26, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.91, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.9, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.86, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.95, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.95, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0, 95.0, 34.0, 50.0, 54.0, 90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0, 34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0]}, "sampler_perf": {"mean_env_wait_ms": 5.15345566138169, "mean_processing_ms": 0.16717489002220728, "mean_inference_ms": 2.4061553716842257}, "off_policy_estimator": {}, "info": {"num_steps_trained": 768000, "num_steps_sampled": 409600, "sample_time_ms": 22972.316, "load_time_ms": 38.916, "grad_time_ms": 10035.28, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.313225884932663e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007412114646285772, "policy_loss": -0.007903209887444973, "vf_loss": 13.404266357421875, "vf_explained_var": 0.34650716185569763, "kl": 0.0011789536802098155, "entropy": 1.6986547708511353, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 409600, "episodes_total": 1024, "training_iteration": 32, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-09-07", "timestamp": 1660244947, "time_this_iter_s": 32.6441330909729, "time_total_s": 3367.885039806366, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3367.885039806366, "timesteps_since_restore": 409600, "iterations_since_restore": 32, "perf": {"cpu_util_percent": 40.13191489361702, "ram_util_percent": 57.20638297872342}} +{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 66.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 33.165}, "custom_metrics": {"sparse_reward_mean": 13.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 39.13, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.01, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.99, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 4.25, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.15, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.97, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.66, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.71, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.96, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.87, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.91, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.97, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.97, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0, 39.0, 71.0, 20.0, 79.0, 34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0, 87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0]}, "sampler_perf": {"mean_env_wait_ms": 5.0079354762159145, "mean_processing_ms": 0.1673845002022688, "mean_inference_ms": 2.3652145638679087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 792000, "num_steps_sampled": 422400, "sample_time_ms": 23410.855, "load_time_ms": 38.911, "grad_time_ms": 10160.504, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.6566129424663316e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.01017048116773367, "policy_loss": -0.010584059171378613, "vf_loss": 12.619880676269531, "vf_explained_var": 0.45027461647987366, "kl": 0.001254777773283422, "entropy": 1.6968183517456055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 422400, "episodes_total": 1056, "training_iteration": 33, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-09-42", "timestamp": 1660244982, "time_this_iter_s": 35.00341510772705, "time_total_s": 3402.888454914093, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3402.888454914093, "timesteps_since_restore": 422400, "iterations_since_restore": 33, "perf": {"cpu_util_percent": 42.62857142857143, "ram_util_percent": 58.25510204081633}} +{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 68.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 34.395}, "custom_metrics": {"sparse_reward_mean": 13.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 41.19, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.84, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.2, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.04, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.36, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 3.01, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.65, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.85, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.21, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.04, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.01, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.01, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0, 34.0, 52.0, 82.0, 23.0, 87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0, 9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0]}, "sampler_perf": {"mean_env_wait_ms": 4.8713540125875445, "mean_processing_ms": 0.16758394883061775, "mean_inference_ms": 2.326528288901312}, "off_policy_estimator": {}, "info": {"num_steps_trained": 816000, "num_steps_sampled": 435200, "sample_time_ms": 23336.79, "load_time_ms": 39.064, "grad_time_ms": 10020.897, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.3283064712331658e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007203067187219858, "policy_loss": -0.007930143736302853, "vf_loss": 15.71717357635498, "vf_explained_var": 0.34764334559440613, "kl": 0.0010395334102213383, "entropy": 1.6892824172973633, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 435200, "episodes_total": 1088, "training_iteration": 34, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-10-12", "timestamp": 1660245012, "time_this_iter_s": 30.092119216918945, "time_total_s": 3432.980574131012, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3432.980574131012, "timesteps_since_restore": 435200, "iterations_since_restore": 34, "perf": {"cpu_util_percent": 41.03023255813954, "ram_util_percent": 57.66976744186048}} +{"episode_reward_max": 146.0, "episode_reward_min": 9.0, "episode_reward_mean": 73.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 80.0}, "policy_reward_mean": {"ppo": 36.89}, "custom_metrics": {"sparse_reward_mean": 15.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 43.78, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.77, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.47, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.16, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.66, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 3.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.4, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 4.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.69, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 0.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.44, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.28, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.86, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.4, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.4, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0, 36.0, 69.0, 88.0, 74.0, 9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0, 90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0]}, "sampler_perf": {"mean_env_wait_ms": 4.742888771715567, "mean_processing_ms": 0.4291925216501232, "mean_inference_ms": 2.2890734350045245}, "off_policy_estimator": {}, "info": {"num_steps_trained": 840000, "num_steps_sampled": 448000, "sample_time_ms": 59523.327, "load_time_ms": 38.502, "grad_time_ms": 106209.033, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1641532356165829e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007091447710990906, "policy_loss": -0.007865053601562977, "vf_loss": 16.12926483154297, "vf_explained_var": 0.35502591729164124, "kl": 0.0012615231098607183, "entropy": 1.6786518096923828, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 448000, "episodes_total": 1120, "training_iteration": 35, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-32-51", "timestamp": 1660246371, "time_this_iter_s": 1359.4666819572449, "time_total_s": 4792.447256088257, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 4792.447256088257, "timesteps_since_restore": 448000, "iterations_since_restore": 35, "perf": {"cpu_util_percent": 73.38606557377048, "ram_util_percent": 58.19344262295081}} +{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 78.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 39.155}, "custom_metrics": {"sparse_reward_mean": 16.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 46.31, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.99, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.5, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.74, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.39, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.55, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.86, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 0.99, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.79, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.2, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.39, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.55, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.39, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.55, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0, 117.0, 90.0, 39.0, 85.0, 90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0, 42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0]}, "sampler_perf": {"mean_env_wait_ms": 4.6429756749225914, "mean_processing_ms": 0.6884190143076668, "mean_inference_ms": 3.1849506897639785}, "off_policy_estimator": {}, "info": {"num_steps_trained": 864000, "num_steps_sampled": 460800, "sample_time_ms": 197454.884, "load_time_ms": 38.154, "grad_time_ms": 142553.757, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.8207661780829145e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.009596621617674828, "policy_loss": -0.010532871820032597, "vf_loss": 17.772741317749023, "vf_explained_var": 0.41850244998931885, "kl": 0.0012102305190637708, "entropy": 1.6820656061172485, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 460800, "episodes_total": 1152, "training_iteration": 36, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-02-29", "timestamp": 1660248149, "time_this_iter_s": 1777.6666460037231, "time_total_s": 6570.11390209198, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6570.11390209198, "timesteps_since_restore": 460800, "iterations_since_restore": 36, "perf": {"cpu_util_percent": 79.74032921810701, "ram_util_percent": 58.72098765432099}} +{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 82.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 41.135}, "custom_metrics": {"sparse_reward_mean": 16.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 50.27, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.38, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.68, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.88, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.05, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 3.66, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.49, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.31, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.77, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.66, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.66, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0, 93.0, 23.0, 103.0, 90.0, 42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0, 31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 4.550001067823368, "mean_processing_ms": 0.9407599736993785, "mean_inference_ms": 4.060064536997679}, "off_policy_estimator": {}, "info": {"num_steps_trained": 888000, "num_steps_sampled": 473600, "sample_time_ms": 197652.347, "load_time_ms": 38.247, "grad_time_ms": 142451.276, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.9103830890414573e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.00908196996897459, "policy_loss": -0.009920346550643444, "vf_loss": 16.691673278808594, "vf_explained_var": 0.3790724277496338, "kl": 0.0013888808898627758, "entropy": 1.661569595336914, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 473600, "episodes_total": 1184, "training_iteration": 37, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-03-05", "timestamp": 1660248185, "time_this_iter_s": 36.35908007621765, "time_total_s": 6606.472982168198, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6606.472982168198, "timesteps_since_restore": 473600, "iterations_since_restore": 37, "perf": {"cpu_util_percent": 52.89999999999999, "ram_util_percent": 59.76923076923076}} +{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 82.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 41.22}, "custom_metrics": {"sparse_reward_mean": 15.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 51.24, "shaped_reward_min": 9, "shaped_reward_max": 84, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.6, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.44, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 4.92, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.88, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.45, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.85, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.75, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 2.47, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.42, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.97, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.85, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.85, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0, 105.0, 87.0, 47.0, 98.0, 31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0, 118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0]}, "sampler_perf": {"mean_env_wait_ms": 4.462790236915632, "mean_processing_ms": 0.9577209626577212, "mean_inference_ms": 4.91290526345304}, "off_policy_estimator": {}, "info": {"num_steps_trained": 912000, "num_steps_sampled": 486400, "sample_time_ms": 197395.402, "load_time_ms": 38.358, "grad_time_ms": 142364.304, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.4551915445207286e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.0073294141329824924, "policy_loss": -0.007997877895832062, "vf_loss": 15.018708229064941, "vf_explained_var": 0.4496181905269623, "kl": 0.0011589183704927564, "entropy": 1.666812539100647, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 486400, "episodes_total": 1216, "training_iteration": 38, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-03-36", "timestamp": 1660248216, "time_this_iter_s": 30.582061052322388, "time_total_s": 6637.05504322052, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6637.05504322052, "timesteps_since_restore": 486400, "iterations_since_restore": 38, "perf": {"cpu_util_percent": 42.890697674418604, "ram_util_percent": 58.16976744186046}} +{"episode_reward_max": 164.0, "episode_reward_min": 9.0, "episode_reward_mean": 80.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 90.0}, "policy_reward_mean": {"ppo": 40.455}, "custom_metrics": {"sparse_reward_mean": 14.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 52.91, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.33, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.33, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.82, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.89, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.99, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.6, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0, 56.0, 60.0, 9.0, 94.0, 118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0, 155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0]}, "sampler_perf": {"mean_env_wait_ms": 4.361961744597006, "mean_processing_ms": 0.9376235930507917, "mean_inference_ms": 4.9270347290029886}, "off_policy_estimator": {}, "info": {"num_steps_trained": 936000, "num_steps_sampled": 499200, "sample_time_ms": 197058.326, "load_time_ms": 38.236, "grad_time_ms": 142247.976, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.275957722603643e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.009562704712152481, "policy_loss": -0.010270781815052032, "vf_loss": 15.400076866149902, "vf_explained_var": 0.39905285835266113, "kl": 0.0014264689525589347, "entropy": 1.6638473272323608, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 499200, "episodes_total": 1248, "training_iteration": 39, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-04-04", "timestamp": 1660248244, "time_this_iter_s": 28.12965416908264, "time_total_s": 6665.184697389603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6665.184697389603, "timesteps_since_restore": 499200, "iterations_since_restore": 39, "perf": {"cpu_util_percent": 32.9825, "ram_util_percent": 58.30499999999999}} +{"episode_reward_max": 193.0, "episode_reward_min": 9.0, "episode_reward_mean": 81.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 40.945}, "custom_metrics": {"sparse_reward_mean": 14.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 53.09, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.34, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.25, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.88, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.78, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 3.9, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.92, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.72, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.2, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.07, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.41, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.9, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.92, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.9, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.92, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0, 37.0, 82.0, 93.0, 98.0, 155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0, 95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0]}, "sampler_perf": {"mean_env_wait_ms": 4.262822214946647, "mean_processing_ms": 0.9177406233023881, "mean_inference_ms": 4.823257056931315}, "off_policy_estimator": {}, "info": {"num_steps_trained": 960000, "num_steps_sampled": 512000, "sample_time_ms": 196701.65, "load_time_ms": 38.048, "grad_time_ms": 142153.928, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.6379788613018216e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.008069280534982681, "policy_loss": -0.008976585231721401, "vf_loss": 17.312698364257812, "vf_explained_var": 0.4009813070297241, "kl": 0.0012740670936182141, "entropy": 1.647910237312317, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 512000, "episodes_total": 1280, "training_iteration": 40, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-04-32", "timestamp": 1660248272, "time_this_iter_s": 27.727252960205078, "time_total_s": 6692.911950349808, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6692.911950349808, "timesteps_since_restore": 512000, "iterations_since_restore": 40, "perf": {"cpu_util_percent": 32.13, "ram_util_percent": 58.30499999999999}} +{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 85.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 113.0}, "policy_reward_mean": {"ppo": 42.77}, "custom_metrics": {"sparse_reward_mean": 14.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 55.94, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.11, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.71, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.9, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.34, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.79, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.45, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.45, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.34, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.79, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.34, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.79, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0, 125.0, 23.0, 58.0, 75.0, 95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0, 63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0]}, "sampler_perf": {"mean_env_wait_ms": 4.168357407076552, "mean_processing_ms": 0.8988004870947216, "mean_inference_ms": 4.723239868801954}, "off_policy_estimator": {}, "info": {"num_steps_trained": 984000, "num_steps_sampled": 524800, "sample_time_ms": 196459.456, "load_time_ms": 38.195, "grad_time_ms": 142037.515, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8189894306509108e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.008718971163034439, "policy_loss": -0.009683111682534218, "vf_loss": 17.845956802368164, "vf_explained_var": 0.43686649203300476, "kl": 0.0014183915918692946, "entropy": 1.6409085988998413, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 524800, "episodes_total": 1312, "training_iteration": 41, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-00", "timestamp": 1660248300, "time_this_iter_s": 27.954697370529175, "time_total_s": 6720.866647720337, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6720.866647720337, "timesteps_since_restore": 524800, "iterations_since_restore": 41, "perf": {"cpu_util_percent": 35.58461538461538, "ram_util_percent": 58.16923076923076}} +{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 92.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 46.035}, "custom_metrics": {"sparse_reward_mean": 16.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 58.47, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.0, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 7.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.54, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.75, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.6, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.18, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.87, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 3.27, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.25, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.6, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.6, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0, 66.0, 72.0, 56.0, 104.0, 63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0, 104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0]}, "sampler_perf": {"mean_env_wait_ms": 4.078369518030482, "mean_processing_ms": 0.8807722404539655, "mean_inference_ms": 4.6286338379623215}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1008000, "num_steps_sampled": 537600, "sample_time_ms": 196166.577, "load_time_ms": 37.661, "grad_time_ms": 141994.493, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.094947153254554e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.00832280796021223, "policy_loss": -0.009453889913856983, "vf_loss": 19.490577697753906, "vf_explained_var": 0.44570884108543396, "kl": 0.0015499308938160539, "entropy": 1.6359552145004272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 537600, "episodes_total": 1344, "training_iteration": 42, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-29", "timestamp": 1660248329, "time_this_iter_s": 29.278310775756836, "time_total_s": 6750.144958496094, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6750.144958496094, "timesteps_since_restore": 537600, "iterations_since_restore": 42, "perf": {"cpu_util_percent": 35.96428571428572, "ram_util_percent": 58.190476190476204}} +{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 92.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 46.195}, "custom_metrics": {"sparse_reward_mean": 17.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 57.99, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.96, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.46, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.68, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 1.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.56, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.38, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.2, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.47, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.99, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.25, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.19, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.56, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.56, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0, 115.0, 84.0, 99.0, 150.0, 104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0, 73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0]}, "sampler_perf": {"mean_env_wait_ms": 3.992670298084334, "mean_processing_ms": 0.8636158543743789, "mean_inference_ms": 4.538596678243932}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1032000, "num_steps_sampled": 550400, "sample_time_ms": 195606.597, "load_time_ms": 37.682, "grad_time_ms": 141791.558, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.547473576627277e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.009549283422529697, "policy_loss": -0.010450693778693676, "vf_loss": 17.197433471679688, "vf_explained_var": 0.4546402394771576, "kl": 0.00132859090808779, "entropy": 1.6366652250289917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 550400, "episodes_total": 1376, "training_iteration": 43, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-57", "timestamp": 1660248357, "time_this_iter_s": 27.376117944717407, "time_total_s": 6777.521076440811, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6777.521076440811, "timesteps_since_restore": 550400, "iterations_since_restore": 43, "perf": {"cpu_util_percent": 34.52051282051282, "ram_util_percent": 58.123076923076916}} +{"episode_reward_max": 212.0, "episode_reward_min": 9.0, "episode_reward_mean": 89.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 44.72}, "custom_metrics": {"sparse_reward_mean": 17.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 55.44, "shaped_reward_min": 9, "shaped_reward_max": 92, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.64, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.03, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.0, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 1.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.16, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 5.24, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.68, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.22, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.16, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 5.24, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.16, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 5.24, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0, 118.0, 82.0, 81.0, 28.0, 73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0, 45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.9108563485299497, "mean_processing_ms": 0.8472286288222008, "mean_inference_ms": 4.453100666428265}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1056000, "num_steps_sampled": 563200, "sample_time_ms": 195418.359, "load_time_ms": 37.483, "grad_time_ms": 141705.307, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.2737367883136385e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.010453901253640652, "policy_loss": -0.011599976569414139, "vf_loss": 19.665088653564453, "vf_explained_var": 0.43753400444984436, "kl": 0.0012759790988638997, "entropy": 1.640870451927185, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 563200, "episodes_total": 1408, "training_iteration": 44, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-06-24", "timestamp": 1660248384, "time_this_iter_s": 27.344013929367065, "time_total_s": 6804.865090370178, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6804.865090370178, "timesteps_since_restore": 563200, "iterations_since_restore": 44, "perf": {"cpu_util_percent": 32.94102564102564, "ram_util_percent": 58.05128205128204}} +{"episode_reward_max": 239.0, "episode_reward_min": 9.0, "episode_reward_mean": 90.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 127.0}, "policy_reward_mean": {"ppo": 45.19}, "custom_metrics": {"sparse_reward_mean": 19.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.38, "shaped_reward_min": 9, "shaped_reward_max": 92, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.55, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.59, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.0, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.95, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 5.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.28, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 2.8, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.55, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.8, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 5.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 5.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0, 212.0, 31.0, 115.0, 31.0, 45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0, 147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.83276732749063, "mean_processing_ms": 0.8315868647990772, "mean_inference_ms": 4.371610853440936}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1080000, "num_steps_sampled": 576000, "sample_time_ms": 158697.168, "load_time_ms": 37.561, "grad_time_ms": 45441.027, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1368683941568192e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.007090561557561159, "policy_loss": -0.008278795517981052, "vf_loss": 20.059175491333008, "vf_explained_var": 0.4839383065700531, "kl": 0.0014106096932664514, "entropy": 1.63534414768219, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 576000, "episodes_total": 1440, "training_iteration": 45, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-06-54", "timestamp": 1660248414, "time_this_iter_s": 29.613693952560425, "time_total_s": 6834.478784322739, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6834.478784322739, "timesteps_since_restore": 576000, "iterations_since_restore": 45, "perf": {"cpu_util_percent": 32.607142857142854, "ram_util_percent": 58.099999999999994}} +{"episode_reward_max": 239.0, "episode_reward_min": 9.0, "episode_reward_mean": 94.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 127.0}, "policy_reward_mean": {"ppo": 47.38}, "custom_metrics": {"sparse_reward_mean": 20.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 53.16, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.72, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.12, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.09, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.56, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 1.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.82, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 2.53, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.99, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.82, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.82, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0, 139.0, 91.0, 48.0, 98.0, 147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0, 239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0]}, "sampler_perf": {"mean_env_wait_ms": 3.75819263232233, "mean_processing_ms": 0.8166563749373907, "mean_inference_ms": 4.294229000839884}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1104000, "num_steps_sampled": 588800, "sample_time_ms": 20385.134, "load_time_ms": 37.538, "grad_time_ms": 8901.075, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.684341970784096e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.00807119719684124, "policy_loss": -0.009133302606642246, "vf_loss": 18.76689338684082, "vf_explained_var": 0.5084854960441589, "kl": 0.0014663866022601724, "entropy": 1.6291638612747192, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 588800, "episodes_total": 1472, "training_iteration": 46, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-07-23", "timestamp": 1660248443, "time_this_iter_s": 29.134671926498413, "time_total_s": 6863.613456249237, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6863.613456249237, "timesteps_since_restore": 588800, "iterations_since_restore": 46, "perf": {"cpu_util_percent": 34.358536585365854, "ram_util_percent": 58.190243902439015}} +{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 94.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 47.0}, "custom_metrics": {"sparse_reward_mean": 21.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.0, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.57, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 6.59, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.07, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.13, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 1.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.41, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.63, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.29, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.49, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.84, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.65, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.89, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.41, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.63, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.41, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.63, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0, 47.0, 61.0, 144.0, 82.0, 239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0, 164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 3.6869910389254943, "mean_processing_ms": 0.8024023118044294, "mean_inference_ms": 4.221576344650746}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1128000, "num_steps_sampled": 601600, "sample_time_ms": 19943.652, "load_time_ms": 37.337, "grad_time_ms": 8965.172, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.842170985392048e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.007924961857497692, "policy_loss": -0.009038448333740234, "vf_loss": 19.34569549560547, "vf_explained_var": 0.504978597164154, "kl": 0.001396413892507553, "entropy": 1.6421631574630737, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 601600, "episodes_total": 1504, "training_iteration": 47, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-07-55", "timestamp": 1660248475, "time_this_iter_s": 32.583869218826294, "time_total_s": 6896.197325468063, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6896.197325468063, "timesteps_since_restore": 601600, "iterations_since_restore": 47, "perf": {"cpu_util_percent": 34.10869565217391, "ram_util_percent": 58.16956521739129}} +{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 101.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 50.81}, "custom_metrics": {"sparse_reward_mean": 22.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 56.02, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.76, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.77, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.56, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 5.52, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.87, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.79, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.97, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.66, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.03, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.83, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.39, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 1.81, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.82, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.79, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.97, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.79, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.97, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0, 115.0, 63.0, 74.0, 98.0, 164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0, 245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0]}, "sampler_perf": {"mean_env_wait_ms": 3.619074133252118, "mean_processing_ms": 0.7888363298173107, "mean_inference_ms": 4.154787775271583}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1152000, "num_steps_sampled": 614400, "sample_time_ms": 20418.706, "load_time_ms": 37.151, "grad_time_ms": 9069.585, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.421085492696024e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.01129829604178667, "policy_loss": -0.012764283455908298, "vf_loss": 22.684043884277344, "vf_explained_var": 0.5366321206092834, "kl": 0.0014537613606080413, "entropy": 1.6048468351364136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 614400, "episodes_total": 1536, "training_iteration": 48, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-08-32", "timestamp": 1660248512, "time_this_iter_s": 36.37463116645813, "time_total_s": 6932.5719566345215, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6932.5719566345215, "timesteps_since_restore": 614400, "iterations_since_restore": 48, "perf": {"cpu_util_percent": 36.49999999999999, "ram_util_percent": 58.29423076923076}} +{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 109.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 54.89}, "custom_metrics": {"sparse_reward_mean": 25.6, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 58.58, "shaped_reward_min": 9, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.59, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.48, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.59, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.35, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 4.63, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.66, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.88, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.63, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.66, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.63, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.66, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0, 115.0, 84.0, 144.0, 39.0, 245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0, 173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.5541442522194506, "mean_processing_ms": 0.7759022948286116, "mean_inference_ms": 4.093132668646798}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1176000, "num_steps_sampled": 627200, "sample_time_ms": 20998.642, "load_time_ms": 37.333, "grad_time_ms": 9227.616, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.10542746348012e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032997550442814827, "policy_loss": -0.004892440978437662, "vf_loss": 23.954416275024414, "vf_explained_var": 0.525080680847168, "kl": 0.0015437895199283957, "entropy": 1.6054998636245728, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 627200, "episodes_total": 1568, "training_iteration": 49, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-09-07", "timestamp": 1660248547, "time_this_iter_s": 35.51046395301819, "time_total_s": 6968.08242058754, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6968.08242058754, "timesteps_since_restore": 627200, "iterations_since_restore": 49, "perf": {"cpu_util_percent": 33.118, "ram_util_percent": 57.68999999999998}} +{"episode_reward_max": 238.0, "episode_reward_min": 9.0, "episode_reward_mean": 123.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 124.0}, "policy_reward_mean": {"ppo": 61.525}, "custom_metrics": {"sparse_reward_mean": 29.6, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 63.85, "shaped_reward_min": 9, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.32, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 8.04, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 6.33, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.84, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.88, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.82, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 5.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.33, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.89, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.84, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0, 190.0, 31.0, 153.0, 104.0, 173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0, 101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0]}, "sampler_perf": {"mean_env_wait_ms": 3.491898537786064, "mean_processing_ms": 0.7635432106455172, "mean_inference_ms": 4.03419160595968}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1200000, "num_steps_sampled": 640000, "sample_time_ms": 21322.647, "load_time_ms": 37.352, "grad_time_ms": 9260.248, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 3.55271373174006e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.005630036350339651, "policy_loss": -0.007203007582575083, "vf_loss": 23.694684982299805, "vf_explained_var": 0.5489806532859802, "kl": 0.0013687704922631383, "entropy": 1.592978835105896, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 640000, "episodes_total": 1600, "training_iteration": 50, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-09-39", "timestamp": 1660248579, "time_this_iter_s": 31.292397022247314, "time_total_s": 6999.374817609787, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6999.374817609787, "timesteps_since_restore": 640000, "iterations_since_restore": 50, "perf": {"cpu_util_percent": 38.30227272727273, "ram_util_percent": 57.75909090909093}} +{"episode_reward_max": 238.0, "episode_reward_min": 9.0, "episode_reward_mean": 125.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 122.0}, "policy_reward_mean": {"ppo": 62.735}, "custom_metrics": {"sparse_reward_mean": 29.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 67.07, "shaped_reward_min": 9, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.57, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 8.17, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 6.51, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.96, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.87, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.35, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.28, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.95, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.96, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.79, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.35, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.28, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.35, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.28, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0, 148.0, 144.0, 93.0, 106.0, 101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0, 127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0]}, "sampler_perf": {"mean_env_wait_ms": 3.4320001935473794, "mean_processing_ms": 0.7516646134052514, "mean_inference_ms": 3.975737374961292}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1224000, "num_steps_sampled": 652800, "sample_time_ms": 21552.858, "load_time_ms": 37.216, "grad_time_ms": 9227.975, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.77635686587003e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032480310183018446, "policy_loss": -0.004836531355977058, "vf_loss": 23.794113159179688, "vf_explained_var": 0.5322676301002502, "kl": 0.0011860225349664688, "entropy": 1.5818275213241577, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 652800, "episodes_total": 1632, "training_iteration": 51, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-10-09", "timestamp": 1660248609, "time_this_iter_s": 29.93578290939331, "time_total_s": 7029.31060051918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7029.31060051918, "timesteps_since_restore": 652800, "iterations_since_restore": 51, "perf": {"cpu_util_percent": 36.09523809523809, "ram_util_percent": 57.82142857142857}} +{"episode_reward_max": 264.0, "episode_reward_min": 12.0, "episode_reward_mean": 128.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 139.0}, "policy_reward_mean": {"ppo": 64.12}, "custom_metrics": {"sparse_reward_mean": 30.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 67.84, "shaped_reward_min": 12, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.73, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.6, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.65, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.97, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.99, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.52, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.15, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.97, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.88, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 5.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.99, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.99, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0, 204.0, 187.0, 20.0, 167.0, 127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0, 98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0]}, "sampler_perf": {"mean_env_wait_ms": 3.3744175644507366, "mean_processing_ms": 0.7402432912819328, "mean_inference_ms": 3.918069988577329}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1248000, "num_steps_sampled": 665600, "sample_time_ms": 21746.558, "load_time_ms": 37.515, "grad_time_ms": 9170.716, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 8.88178432935015e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.00774852791801095, "policy_loss": -0.009392179548740387, "vf_loss": 24.351181030273438, "vf_explained_var": 0.5798514485359192, "kl": 0.0016348478384315968, "entropy": 1.5829213857650757, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 665600, "episodes_total": 1664, "training_iteration": 52, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-10-39", "timestamp": 1660248639, "time_this_iter_s": 30.649518966674805, "time_total_s": 7059.960119485855, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7059.960119485855, "timesteps_since_restore": 665600, "iterations_since_restore": 52, "perf": {"cpu_util_percent": 41.01818181818182, "ram_util_percent": 57.649999999999984}} +{"episode_reward_max": 293.0, "episode_reward_min": 12.0, "episode_reward_mean": 128.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 154.0}, "policy_reward_mean": {"ppo": 64.38}, "custom_metrics": {"sparse_reward_mean": 30.0, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 68.76, "shaped_reward_min": 12, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.92, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.71, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.68, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.5, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.85, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.81, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.87, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 5.85, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.81, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.85, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.81, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0, 88.0, 12.0, 122.0, 166.0, 98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0, 141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.319165284336134, "mean_processing_ms": 0.7292810578162132, "mean_inference_ms": 3.8629630777095305}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1272000, "num_steps_sampled": 678400, "sample_time_ms": 22200.855, "load_time_ms": 37.251, "grad_time_ms": 9256.826, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 4.440892164675075e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.006719778757542372, "policy_loss": -0.008363676257431507, "vf_loss": 24.369796752929688, "vf_explained_var": 0.5794721245765686, "kl": 0.0014526437735185027, "entropy": 1.586159348487854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 678400, "episodes_total": 1696, "training_iteration": 53, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-11-12", "timestamp": 1660248672, "time_this_iter_s": 32.775245904922485, "time_total_s": 7092.735365390778, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7092.735365390778, "timesteps_since_restore": 678400, "iterations_since_restore": 53, "perf": {"cpu_util_percent": 38.126086956521746, "ram_util_percent": 57.626086956521746}} +{"episode_reward_max": 296.0, "episode_reward_min": 14.0, "episode_reward_mean": 132.59, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 154.0}, "policy_reward_mean": {"ppo": 66.295}, "custom_metrics": {"sparse_reward_mean": 31.4, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 69.79, "shaped_reward_min": 14, "shaped_reward_max": 110, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.04, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.88, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.88, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.68, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.93, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.95, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.92, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.44, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.84, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.97, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 5.95, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.92, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.95, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.92, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0, 153.0, 85.0, 101.0, 123.0, 141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0, 207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0]}, "sampler_perf": {"mean_env_wait_ms": 3.2660665746708606, "mean_processing_ms": 0.718736935276724, "mean_inference_ms": 3.8108664134221066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1296000, "num_steps_sampled": 691200, "sample_time_ms": 22619.672, "load_time_ms": 37.409, "grad_time_ms": 9296.582, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 2.2204460823375376e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.006839328911155462, "policy_loss": -0.008672266267240047, "vf_loss": 26.157081604003906, "vf_explained_var": 0.5799071192741394, "kl": 0.0013235282385721803, "entropy": 1.5655454397201538, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 691200, "episodes_total": 1728, "training_iteration": 54, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-11-44", "timestamp": 1660248704, "time_this_iter_s": 31.93130087852478, "time_total_s": 7124.666666269302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7124.666666269302, "timesteps_since_restore": 691200, "iterations_since_restore": 54, "perf": {"cpu_util_percent": 38.36222222222223, "ram_util_percent": 57.54666666666667}} +{"episode_reward_max": 301.0, "episode_reward_min": 17.0, "episode_reward_mean": 138.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 155.0}, "policy_reward_mean": {"ppo": 69.05}, "custom_metrics": {"sparse_reward_mean": 33.6, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 70.9, "shaped_reward_min": 17, "shaped_reward_max": 113, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.15, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.01, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.92, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.88, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 6.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.19, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.93, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 6.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.19, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.19, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0, 104.0, 152.0, 238.0, 88.0, 207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0, 205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0]}, "sampler_perf": {"mean_env_wait_ms": 3.214861984478974, "mean_processing_ms": 0.7085517018126862, "mean_inference_ms": 3.760458478559153}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1320000, "num_steps_sampled": 704000, "sample_time_ms": 22712.594, "load_time_ms": 37.614, "grad_time_ms": 9158.532, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.1102230411687688e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.007639638613909483, "policy_loss": -0.00974108837544918, "vf_loss": 28.871795654296875, "vf_explained_var": 0.5772756934165955, "kl": 0.0015572212869301438, "entropy": 1.5714462995529175, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 704000, "episodes_total": 1760, "training_iteration": 55, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-12-13", "timestamp": 1660248733, "time_this_iter_s": 29.164530992507935, "time_total_s": 7153.83119726181, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7153.83119726181, "timesteps_since_restore": 704000, "iterations_since_restore": 55, "perf": {"cpu_util_percent": 37.4609756097561, "ram_util_percent": 57.50487804878048}} +{"episode_reward_max": 301.0, "episode_reward_min": 12.0, "episode_reward_mean": 149.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 74.835}, "custom_metrics": {"sparse_reward_mean": 37.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 74.07, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.1, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.78, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 6.98, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 7.57, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 5.98, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.8, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 5.98, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.98, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0, 245.0, 121.0, 207.0, 97.0, 205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0, 144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0]}, "sampler_perf": {"mean_env_wait_ms": 3.16541739890218, "mean_processing_ms": 0.6987189186943766, "mean_inference_ms": 3.711599248384498}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1344000, "num_steps_sampled": 716800, "sample_time_ms": 22951.468, "load_time_ms": 37.343, "grad_time_ms": 9065.669, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 5.551115205843844e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004821139387786388, "policy_loss": -0.006896324921399355, "vf_loss": 28.4981746673584, "vf_explained_var": 0.6428199410438538, "kl": 0.0015486044576391578, "entropy": 1.5492569208145142, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 716800, "episodes_total": 1792, "training_iteration": 56, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-12-44", "timestamp": 1660248764, "time_this_iter_s": 30.594375133514404, "time_total_s": 7184.425572395325, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7184.425572395325, "timesteps_since_restore": 716800, "iterations_since_restore": 56, "perf": {"cpu_util_percent": 38.95348837209303, "ram_util_percent": 57.599999999999994}} +{"episode_reward_max": 305.0, "episode_reward_min": 12.0, "episode_reward_mean": 157.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 78.625}, "custom_metrics": {"sparse_reward_mean": 41.6, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 74.05, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.82, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 9.13, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 6.75, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 8.03, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.96, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 5.8, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 7.28, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.51, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.3, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.91, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.8, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 7.28, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.8, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 7.28, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0, 270.0, 155.0, 147.0, 75.0, 144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0, 203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0]}, "sampler_perf": {"mean_env_wait_ms": 3.1176592492930246, "mean_processing_ms": 0.6892173986565695, "mean_inference_ms": 3.663860513787198}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1368000, "num_steps_sampled": 729600, "sample_time_ms": 22935.784, "load_time_ms": 37.435, "grad_time_ms": 8913.484, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 2.775557602921922e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004853060003370047, "policy_loss": -0.0074228327721357346, "vf_loss": 33.384822845458984, "vf_explained_var": 0.6208257079124451, "kl": 0.0016279626870527864, "entropy": 1.5374183654785156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 729600, "episodes_total": 1824, "training_iteration": 57, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-13-15", "timestamp": 1660248795, "time_this_iter_s": 30.907179594039917, "time_total_s": 7215.332751989365, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7215.332751989365, "timesteps_since_restore": 729600, "iterations_since_restore": 57, "perf": {"cpu_util_percent": 39.13636363636363, "ram_util_percent": 57.62499999999999}} +{"episode_reward_max": 305.0, "episode_reward_min": 12.0, "episode_reward_mean": 166.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 83.36}, "custom_metrics": {"sparse_reward_mean": 44.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 77.12, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.95, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 9.49, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.04, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.32, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.99, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.99, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.02, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.65, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.16, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.24, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.65, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.75, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 6.02, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.65, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.02, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.65, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0, 199.0, 163.0, 115.0, 193.0, 203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0, 201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0]}, "sampler_perf": {"mean_env_wait_ms": 3.0715992457802104, "mean_processing_ms": 0.6800724399450163, "mean_inference_ms": 3.6184030024872835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1392000, "num_steps_sampled": 742400, "sample_time_ms": 22627.432, "load_time_ms": 37.419, "grad_time_ms": 8703.731, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.387778801460961e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004274281207472086, "policy_loss": -0.006773001980036497, "vf_loss": 32.668846130371094, "vf_explained_var": 0.6147891879081726, "kl": 0.0016452163690701127, "entropy": 1.5363364219665527, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 742400, "episodes_total": 1856, "training_iteration": 58, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-13-46", "timestamp": 1660248826, "time_this_iter_s": 31.19256901741028, "time_total_s": 7246.525321006775, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7246.525321006775, "timesteps_since_restore": 742400, "iterations_since_restore": 58, "perf": {"cpu_util_percent": 37.85, "ram_util_percent": 57.636363636363626}} +{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 173.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 177.0}, "policy_reward_mean": {"ppo": 86.68}, "custom_metrics": {"sparse_reward_mean": 48.4, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 76.56, "shaped_reward_min": 9, "shaped_reward_max": 110, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.08, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.37, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.26, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.3, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.31, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.56, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.77, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.59, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.64, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 6.31, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.56, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.31, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.56, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0, 219.0, 82.0, 216.0, 148.0, 201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0, 120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0]}, "sampler_perf": {"mean_env_wait_ms": 3.027099137658147, "mean_processing_ms": 0.6712374159631969, "mean_inference_ms": 3.574879048855646}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1416000, "num_steps_sampled": 755200, "sample_time_ms": 22439.192, "load_time_ms": 37.393, "grad_time_ms": 8622.999, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 6.938894007304805e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.005061946343630552, "policy_loss": -0.0077269431203603745, "vf_loss": 34.30827713012695, "vf_explained_var": 0.6426100730895996, "kl": 0.0014075502986088395, "entropy": 1.5316654443740845, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 755200, "episodes_total": 1888, "training_iteration": 59, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-14-19", "timestamp": 1660248859, "time_this_iter_s": 32.82003712654114, "time_total_s": 7279.345358133316, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7279.345358133316, "timesteps_since_restore": 755200, "iterations_since_restore": 59, "perf": {"cpu_util_percent": 38.134782608695645, "ram_util_percent": 57.654347826086926}} +{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 183.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 177.0}, "policy_reward_mean": {"ppo": 91.965}, "custom_metrics": {"sparse_reward_mean": 52.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 78.33, "shaped_reward_min": 9, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.49, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.58, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.62, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.41, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.99, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.64, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.61, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.59, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.14, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.85, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.56, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 6.64, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.61, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.64, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.61, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0, 167.0, 110.0, 155.0, 108.0, 120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0, 195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0]}, "sampler_perf": {"mean_env_wait_ms": 2.9840674048083304, "mean_processing_ms": 0.6627022470508764, "mean_inference_ms": 3.532781736604636}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1440000, "num_steps_sampled": 768000, "sample_time_ms": 22393.025, "load_time_ms": 37.431, "grad_time_ms": 8732.011, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.4694470036524025e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032793853897601366, "policy_loss": -0.0061605386435985565, "vf_loss": 36.42392349243164, "vf_explained_var": 0.6542922854423523, "kl": 0.0015746770659461617, "entropy": 1.5224775075912476, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 768000, "episodes_total": 1920, "training_iteration": 60, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-14-51", "timestamp": 1660248891, "time_this_iter_s": 31.919984817504883, "time_total_s": 7311.265342950821, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7311.265342950821, "timesteps_since_restore": 768000, "iterations_since_restore": 60, "perf": {"cpu_util_percent": 38.15, "ram_util_percent": 57.70217391304345}} +{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 199.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 99.975}, "custom_metrics": {"sparse_reward_mean": 59.6, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 80.75, "shaped_reward_min": 9, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.59, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.82, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.86, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.95, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.94, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.14, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.78, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.59, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.48, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 6.94, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.94, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0, 167.0, 152.0, 164.0, 263.0, 195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0, 167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0]}, "sampler_perf": {"mean_env_wait_ms": 2.942382408677298, "mean_processing_ms": 0.6544447985694, "mean_inference_ms": 3.4920936282643287}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1464000, "num_steps_sampled": 780800, "sample_time_ms": 22508.979, "load_time_ms": 37.523, "grad_time_ms": 8924.852, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.7347235018262012e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013272188371047378, "policy_loss": -0.004394210409373045, "vf_loss": 38.1645622253418, "vf_explained_var": 0.6507807374000549, "kl": 0.002042042789980769, "entropy": 1.4989361763000488, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 780800, "episodes_total": 1952, "training_iteration": 61, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-15-24", "timestamp": 1660248924, "time_this_iter_s": 33.02385997772217, "time_total_s": 7344.289202928543, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7344.289202928543, "timesteps_since_restore": 780800, "iterations_since_restore": 61, "perf": {"cpu_util_percent": 36.91521739130434, "ram_util_percent": 57.791304347826106}} +{"episode_reward_max": 341.0, "episode_reward_min": 9.0, "episode_reward_mean": 206.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 103.06}, "custom_metrics": {"sparse_reward_mean": 60.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 84.52, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.75, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.13, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.0, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.26, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.63, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 7.2, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.37, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.86, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.76, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.44, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 7.2, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.37, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.2, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.37, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0, 170.0, 9.0, 253.0, 115.0, 167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0, 255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0]}, "sampler_perf": {"mean_env_wait_ms": 2.902001918125836, "mean_processing_ms": 0.6464539836010921, "mean_inference_ms": 3.4531930074110506}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1488000, "num_steps_sampled": 793600, "sample_time_ms": 22714.251, "load_time_ms": 37.548, "grad_time_ms": 9167.09, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.673617509131006e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0036004248540848494, "policy_loss": -0.006528293248265982, "vf_loss": 36.78936767578125, "vf_explained_var": 0.6745734810829163, "kl": 0.0014449331210926175, "entropy": 1.5021357536315918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 793600, "episodes_total": 1984, "training_iteration": 62, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-15-59", "timestamp": 1660248959, "time_this_iter_s": 35.12303113937378, "time_total_s": 7379.412234067917, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7379.412234067917, "timesteps_since_restore": 793600, "iterations_since_restore": 62, "perf": {"cpu_util_percent": 35.66, "ram_util_percent": 57.732}} +{"episode_reward_max": 368.0, "episode_reward_min": 9.0, "episode_reward_mean": 220.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 186.0}, "policy_reward_mean": {"ppo": 110.09}, "custom_metrics": {"sparse_reward_mean": 65.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 88.58, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.92, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.43, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 8.21, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.74, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 7.47, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.83, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.29, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.14, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.73, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.48, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.21, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 7.47, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.83, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.47, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.83, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0, 256.0, 213.0, 196.0, 212.0, 255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0, 176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0]}, "sampler_perf": {"mean_env_wait_ms": 2.8628696517904486, "mean_processing_ms": 0.6387085923696654, "mean_inference_ms": 3.4155991200139666}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1512000, "num_steps_sampled": 806400, "sample_time_ms": 22536.552, "load_time_ms": 37.579, "grad_time_ms": 9310.446, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.336808754565503e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014393635792657733, "policy_loss": -0.0051459651440382, "vf_loss": 44.491573333740234, "vf_explained_var": 0.6412068009376526, "kl": 0.001486484077759087, "entropy": 1.485115885734558, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 806400, "episodes_total": 2016, "training_iteration": 63, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-16-31", "timestamp": 1660248991, "time_this_iter_s": 32.43239998817444, "time_total_s": 7411.844634056091, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7411.844634056091, "timesteps_since_restore": 806400, "iterations_since_restore": 63, "perf": {"cpu_util_percent": 36.01739130434783, "ram_util_percent": 57.59347826086956}} +{"episode_reward_max": 368.0, "episode_reward_min": 9.0, "episode_reward_mean": 232.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 190.0}, "policy_reward_mean": {"ppo": 116.33}, "custom_metrics": {"sparse_reward_mean": 70.6, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 91.46, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.53, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.35, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 8.77, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.08, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.75, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.59, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.51, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.67, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 3.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.42, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.32, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.08, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.75, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.08, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.75, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0, 210.0, 298.0, 150.0, 241.0, 176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0, 316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0]}, "sampler_perf": {"mean_env_wait_ms": 2.824862261482626, "mean_processing_ms": 0.6311660417813414, "mean_inference_ms": 3.3776913519982266}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1536000, "num_steps_sampled": 819200, "sample_time_ms": 21992.331, "load_time_ms": 37.617, "grad_time_ms": 9517.458, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.1684043772827515e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0046628438867628574, "policy_loss": -0.008121621794998646, "vf_loss": 41.953346252441406, "vf_explained_var": 0.6826162934303284, "kl": 0.0015492010861635208, "entropy": 1.4731155633926392, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 819200, "episodes_total": 2048, "training_iteration": 64, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-00", "timestamp": 1660249020, "time_this_iter_s": 28.55878710746765, "time_total_s": 7440.403421163559, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7440.403421163559, "timesteps_since_restore": 819200, "iterations_since_restore": 64, "perf": {"cpu_util_percent": 38.065, "ram_util_percent": 57.504999999999995}} +{"episode_reward_max": 390.0, "episode_reward_min": 9.0, "episode_reward_mean": 243.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 201.0}, "policy_reward_mean": {"ppo": 121.615}, "custom_metrics": {"sparse_reward_mean": 74.8, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 93.63, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.88, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.52, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 9.11, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.77, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.38, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.85, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.62, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.71, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 3.06, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.41, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.36, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.38, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.85, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.38, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.85, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0, 301.0, 127.0, 307.0, 236.0, 316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0, 55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0]}, "sampler_perf": {"mean_env_wait_ms": 2.787957702502074, "mean_processing_ms": 0.623833190465171, "mean_inference_ms": 3.33934185787935}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1560000, "num_steps_sampled": 832000, "sample_time_ms": 21829.313, "load_time_ms": 37.489, "grad_time_ms": 9503.726, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.0842021886413758e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026782825589179993, "policy_loss": -0.0062830038368701935, "vf_loss": 43.38319778442383, "vf_explained_var": 0.6999297738075256, "kl": 0.0015296392375603318, "entropy": 1.4671941995620728, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 832000, "episodes_total": 2080, "training_iteration": 65, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-27", "timestamp": 1660249047, "time_this_iter_s": 27.39732599258423, "time_total_s": 7467.800747156143, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7467.800747156143, "timesteps_since_restore": 832000, "iterations_since_restore": 65, "perf": {"cpu_util_percent": 36.88717948717949, "ram_util_percent": 57.59230769230768}} +{"episode_reward_max": 402.0, "episode_reward_min": 55.0, "episode_reward_mean": 253.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 26.0}, "policy_reward_max": {"ppo": 201.0}, "policy_reward_mean": {"ppo": 126.685}, "custom_metrics": {"sparse_reward_mean": 79.0, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.37, "shaped_reward_min": 31, "shaped_reward_max": 133, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.36, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.55, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 9.6, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.72, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.84, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.85, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.47, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.55, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.84, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.85, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.84, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.85, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0, 71.0, 267.0, 241.0, 313.0, 55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0, 207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0]}, "sampler_perf": {"mean_env_wait_ms": 2.7521510850720086, "mean_processing_ms": 0.6167185711534096, "mean_inference_ms": 3.3015786839102956}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1584000, "num_steps_sampled": 844800, "sample_time_ms": 21629.389, "load_time_ms": 37.73, "grad_time_ms": 9476.984, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.421010943206879e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.005265243351459503, "policy_loss": -0.009142073802649975, "vf_loss": 46.01101303100586, "vf_explained_var": 0.713275671005249, "kl": 0.001622045412659645, "entropy": 1.4485527276992798, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 844800, "episodes_total": 2112, "training_iteration": 66, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-56", "timestamp": 1660249076, "time_this_iter_s": 28.3277370929718, "time_total_s": 7496.128484249115, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7496.128484249115, "timesteps_since_restore": 844800, "iterations_since_restore": 66, "perf": {"cpu_util_percent": 32.035000000000004, "ram_util_percent": 57.5875}} +{"episode_reward_max": 421.0, "episode_reward_min": 23.0, "episode_reward_mean": 258.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 211.0}, "policy_reward_mean": {"ppo": 129.21}, "custom_metrics": {"sparse_reward_mean": 81.6, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.22, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.07, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.87, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.02, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.65, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.14, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.45, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.65, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.14, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.65, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.14, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0, 178.0, 284.0, 270.0, 273.0, 207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0, 282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0]}, "sampler_perf": {"mean_env_wait_ms": 2.717456296666147, "mean_processing_ms": 0.6098308335159816, "mean_inference_ms": 3.265231126103296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1608000, "num_steps_sampled": 857600, "sample_time_ms": 21370.114, "load_time_ms": 37.697, "grad_time_ms": 9313.497, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.7105054716034394e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019875967409461737, "policy_loss": -0.006022992078214884, "vf_loss": 47.62739562988281, "vf_explained_var": 0.6981029510498047, "kl": 0.0015933552058413625, "entropy": 1.4546891450881958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 857600, "episodes_total": 2144, "training_iteration": 67, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-18-22", "timestamp": 1660249102, "time_this_iter_s": 26.67682385444641, "time_total_s": 7522.805308103561, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7522.805308103561, "timesteps_since_restore": 857600, "iterations_since_restore": 67, "perf": {"cpu_util_percent": 34.505405405405405, "ram_util_percent": 57.59189189189188}} +{"episode_reward_max": 421.0, "episode_reward_min": 23.0, "episode_reward_mean": 275.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 211.0}, "policy_reward_mean": {"ppo": 137.5}, "custom_metrics": {"sparse_reward_mean": 88.8, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 97.4, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.21, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 11.29, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.56, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.48, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.75, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.63, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.73, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.46, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.75, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.63, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.75, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.63, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0, 290.0, 259.0, 159.0, 252.0, 282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0, 218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0]}, "sampler_perf": {"mean_env_wait_ms": 2.683811918728699, "mean_processing_ms": 0.6031493203010873, "mean_inference_ms": 3.2301262692347574}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1632000, "num_steps_sampled": 870400, "sample_time_ms": 21057.05, "load_time_ms": 37.776, "grad_time_ms": 9180.759, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.3552527358017197e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.0049219937063753605, "policy_loss": -0.009040978737175465, "vf_loss": 48.340152740478516, "vf_explained_var": 0.6955335140228271, "kl": 0.0016705109737813473, "entropy": 1.430059552192688, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 870400, "episodes_total": 2176, "training_iteration": 68, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-18-49", "timestamp": 1660249129, "time_this_iter_s": 26.73872995376587, "time_total_s": 7549.544038057327, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7549.544038057327, "timesteps_since_restore": 870400, "iterations_since_restore": 68, "perf": {"cpu_util_percent": 34.623684210526314, "ram_util_percent": 57.58947368421052}} +{"episode_reward_max": 462.0, "episode_reward_min": 23.0, "episode_reward_mean": 287.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 236.0}, "policy_reward_mean": {"ppo": 143.99}, "custom_metrics": {"sparse_reward_mean": 94.8, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 98.38, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.38, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 11.18, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.46, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.86, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.74, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 4.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.84, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.49, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.41, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.86, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.74, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.86, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.74, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0, 250.0, 282.0, 298.0, 305.0, 218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0, 330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0]}, "sampler_perf": {"mean_env_wait_ms": 2.6511088970025942, "mean_processing_ms": 0.5966469783379228, "mean_inference_ms": 3.1956784822649578}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1656000, "num_steps_sampled": 883200, "sample_time_ms": 20666.066, "load_time_ms": 37.699, "grad_time_ms": 8951.146, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 6.776263679008599e-22, "cur_lr": 0.0010000000474974513, "total_loss": -0.007915745489299297, "policy_loss": -0.011840385384857655, "vf_loss": 46.363162994384766, "vf_explained_var": 0.7722532153129578, "kl": 0.0015700907679274678, "entropy": 1.423343300819397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 883200, "episodes_total": 2208, "training_iteration": 69, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-19-16", "timestamp": 1660249156, "time_this_iter_s": 26.615740060806274, "time_total_s": 7576.1597781181335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7576.1597781181335, "timesteps_since_restore": 883200, "iterations_since_restore": 69, "perf": {"cpu_util_percent": 33.539473684210535, "ram_util_percent": 57.605263157894726}} +{"episode_reward_max": 462.0, "episode_reward_min": 23.0, "episode_reward_mean": 296.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 236.0}, "policy_reward_mean": {"ppo": 148.435}, "custom_metrics": {"sparse_reward_mean": 98.6, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 99.67, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.41, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.35, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.87, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.64, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.93, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.01, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.68, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.39, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.93, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.01, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.93, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.01, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0, 264.0, 208.0, 23.0, 225.0, 330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0, 242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0]}, "sampler_perf": {"mean_env_wait_ms": 2.619364465918477, "mean_processing_ms": 0.59033913982099, "mean_inference_ms": 3.1628679481393043}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1680000, "num_steps_sampled": 896000, "sample_time_ms": 20622.52, "load_time_ms": 37.691, "grad_time_ms": 8717.912, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.3881318395042993e-22, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007909121923148632, "policy_loss": -0.0035155529621988535, "vf_loss": 50.137577056884766, "vf_explained_var": 0.7450786232948303, "kl": 0.0021507267374545336, "entropy": 1.4145766496658325, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 896000, "episodes_total": 2240, "training_iteration": 70, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-19-45", "timestamp": 1660249185, "time_this_iter_s": 29.150850772857666, "time_total_s": 7605.310628890991, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7605.310628890991, "timesteps_since_restore": 896000, "iterations_since_restore": 70, "perf": {"cpu_util_percent": 33.670731707317074, "ram_util_percent": 57.6219512195122}} +{"episode_reward_max": 462.0, "episode_reward_min": 63.0, "episode_reward_mean": 310.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 23.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 155.035}, "custom_metrics": {"sparse_reward_mean": 103.8, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 102.47, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.6, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.29, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.1, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.61, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.29, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 9.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.23, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.33, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 9.29, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 9.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.29, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 9.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0, 255.0, 307.0, 176.0, 413.0, 242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0, 249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5885045396886737, "mean_processing_ms": 0.584211440514898, "mean_inference_ms": 3.1312910646882246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1704000, "num_steps_sampled": 908800, "sample_time_ms": 20412.625, "load_time_ms": 37.645, "grad_time_ms": 8491.272, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.6940659197521496e-22, "cur_lr": 0.0010000000474974513, "total_loss": 9.037616109708324e-05, "policy_loss": -0.004211378749459982, "vf_loss": 49.97343826293945, "vf_explained_var": 0.7645077705383301, "kl": 0.0018662656657397747, "entropy": 1.391157627105713, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 908800, "episodes_total": 2272, "training_iteration": 71, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-20-14", "timestamp": 1660249214, "time_this_iter_s": 28.656519889831543, "time_total_s": 7633.967148780823, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7633.967148780823, "timesteps_since_restore": 908800, "iterations_since_restore": 71, "perf": {"cpu_util_percent": 34.982926829268294, "ram_util_percent": 57.707317073170735}} +{"episode_reward_max": 456.0, "episode_reward_min": 63.0, "episode_reward_mean": 316.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 23.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 158.135}, "custom_metrics": {"sparse_reward_mean": 105.8, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 104.67, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.74, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.52, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.21, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.81, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.4, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.18, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.36, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.75, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.84, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.38, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.4, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.18, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.4, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.18, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0, 347.0, 63.0, 356.0, 210.0, 249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0, 367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0]}, "sampler_perf": {"mean_env_wait_ms": 2.558557313555292, "mean_processing_ms": 0.5782709476223633, "mean_inference_ms": 3.1013750793848702}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1728000, "num_steps_sampled": 921600, "sample_time_ms": 20167.129, "load_time_ms": 37.249, "grad_time_ms": 8246.669, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.470329598760748e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024793706834316254, "policy_loss": -0.007412927225232124, "vf_loss": 56.26578903198242, "vf_explained_var": 0.7433841228485107, "kl": 0.0019004354253411293, "entropy": 1.3860511779785156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 921600, "episodes_total": 2304, "training_iteration": 72, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-20-44", "timestamp": 1660249244, "time_this_iter_s": 30.219820022583008, "time_total_s": 7664.186968803406, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7664.186968803406, "timesteps_since_restore": 921600, "iterations_since_restore": 72, "perf": {"cpu_util_percent": 35.07380952380952, "ram_util_percent": 57.70714285714284}} +{"episode_reward_max": 465.0, "episode_reward_min": 68.0, "episode_reward_mean": 332.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 28.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 166.23}, "custom_metrics": {"sparse_reward_mean": 111.6, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 109.26, "shaped_reward_min": 28, "shaped_reward_max": 145, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.81, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 12.13, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.46, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 9.53, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.79, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.63, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.46, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.07, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.28, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.31, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.53, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.79, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.53, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.79, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0, 356.0, 302.0, 269.0, 68.0, 367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0, 408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5294421557916076, "mean_processing_ms": 0.5725031041756122, "mean_inference_ms": 3.0723080495869532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1752000, "num_steps_sampled": 934400, "sample_time_ms": 20097.062, "load_time_ms": 37.14, "grad_time_ms": 8126.211, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.235164799380374e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0012375875376164913, "policy_loss": -0.006141460034996271, "vf_loss": 55.8723258972168, "vf_explained_var": 0.7437755465507507, "kl": 0.0014161770232021809, "entropy": 1.3667305707931519, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 934400, "episodes_total": 2336, "training_iteration": 73, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-21-14", "timestamp": 1660249274, "time_this_iter_s": 30.526150941848755, "time_total_s": 7694.7131197452545, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7694.7131197452545, "timesteps_since_restore": 934400, "iterations_since_restore": 73, "perf": {"cpu_util_percent": 36.46279069767442, "ram_util_percent": 57.75116279069769}} +{"episode_reward_max": 465.0, "episode_reward_min": 120.0, "episode_reward_mean": 346.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 55.0}, "policy_reward_max": {"ppo": 239.0}, "policy_reward_mean": {"ppo": 173.395}, "custom_metrics": {"sparse_reward_mean": 117.2, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 112.39, "shaped_reward_min": 40, "shaped_reward_max": 147, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.26, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 12.36, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.7, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.66, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 9.87, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.09, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.89, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.37, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.39, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.36, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.29, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.87, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.09, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.87, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.09, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0, 390.0, 237.0, 402.0, 250.0, 408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0, 373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5011550162167318, "mean_processing_ms": 0.5669031638789668, "mean_inference_ms": 3.044470138401616}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1776000, "num_steps_sampled": 947200, "sample_time_ms": 20475.505, "load_time_ms": 36.886, "grad_time_ms": 8011.391, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.117582399690187e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.002805360360071063, "policy_loss": -0.007409963756799698, "vf_loss": 52.88139724731445, "vf_explained_var": 0.7572636008262634, "kl": 0.0014988663606345654, "entropy": 1.3671082258224487, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 947200, "episodes_total": 2368, "training_iteration": 74, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-21-46", "timestamp": 1660249306, "time_this_iter_s": 31.191842079162598, "time_total_s": 7725.904961824417, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7725.904961824417, "timesteps_since_restore": 947200, "iterations_since_restore": 74, "perf": {"cpu_util_percent": 34.40666666666667, "ram_util_percent": 57.844444444444456}} +{"episode_reward_max": 465.0, "episode_reward_min": 120.0, "episode_reward_mean": 364.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 55.0}, "policy_reward_max": {"ppo": 239.0}, "policy_reward_mean": {"ppo": 182.04}, "custom_metrics": {"sparse_reward_mean": 124.0, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 116.08, "shaped_reward_min": 40, "shaped_reward_max": 147, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.3, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 12.93, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.86, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.23, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.09, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.68, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.56, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.38, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.42, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.62, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 10.09, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.68, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.09, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.68, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0, 344.0, 275.0, 410.0, 382.0, 373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0, 358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4736327447914648, "mean_processing_ms": 0.5614637333952731, "mean_inference_ms": 3.0177932889211685}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1800000, "num_steps_sampled": 960000, "sample_time_ms": 20800.663, "load_time_ms": 36.895, "grad_time_ms": 8168.473, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.0587911998450935e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010099885985255241, "policy_loss": -0.006211612839251757, "vf_loss": 58.7685546875, "vf_explained_var": 0.7208888530731201, "kl": 0.0020332669373601675, "entropy": 1.3504695892333984, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 960000, "episodes_total": 2400, "training_iteration": 75, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-22-18", "timestamp": 1660249338, "time_this_iter_s": 32.21927499771118, "time_total_s": 7758.124236822128, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7758.124236822128, "timesteps_since_restore": 960000, "iterations_since_restore": 75, "perf": {"cpu_util_percent": 32.595555555555556, "ram_util_percent": 57.83555555555553}} +{"episode_reward_max": 507.0, "episode_reward_min": 196.0, "episode_reward_mean": 369.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 258.0}, "policy_reward_mean": {"ppo": 184.535}, "custom_metrics": {"sparse_reward_mean": 125.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 117.47, "shaped_reward_min": 70, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.42, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.2, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.97, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.42, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.26, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.75, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.45, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.26, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.75, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.26, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.75, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0, 236.0, 287.0, 350.0, 408.0, 358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0, 438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4468834694970774, "mean_processing_ms": 0.5561781773126093, "mean_inference_ms": 2.992510104410383}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1824000, "num_steps_sampled": 972800, "sample_time_ms": 21085.438, "load_time_ms": 36.907, "grad_time_ms": 8381.058, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 5.293955999225468e-24, "cur_lr": 0.0010000000474974513, "total_loss": -0.000600266270339489, "policy_loss": -0.005276266019791365, "vf_loss": 53.540836334228516, "vf_explained_var": 0.7716453671455383, "kl": 0.0016209534369409084, "entropy": 1.3561688661575317, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 972800, "episodes_total": 2432, "training_iteration": 76, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-22-51", "timestamp": 1660249371, "time_this_iter_s": 33.30055785179138, "time_total_s": 7791.42479467392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7791.42479467392, "timesteps_since_restore": 972800, "iterations_since_restore": 76, "perf": {"cpu_util_percent": 33.693617021276594, "ram_util_percent": 57.704255319148906}} +{"episode_reward_max": 510.0, "episode_reward_min": 208.0, "episode_reward_mean": 377.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 104.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 188.74}, "custom_metrics": {"sparse_reward_mean": 129.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 119.48, "shaped_reward_min": 70, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.62, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.48, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 11.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.72, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.63, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.34, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.03, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.78, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.39, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.19, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.18, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.34, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.03, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.34, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.03, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0, 359.0, 395.0, 310.0, 339.0, 438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0, 272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4209020546417674, "mean_processing_ms": 0.551048674766232, "mean_inference_ms": 2.9680431709223565}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1848000, "num_steps_sampled": 985600, "sample_time_ms": 21397.355, "load_time_ms": 37.117, "grad_time_ms": 8669.98, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.646977999612734e-24, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010796785354614258, "policy_loss": -0.006340180989354849, "vf_loss": 59.34244918823242, "vf_explained_var": 0.7488496899604797, "kl": 0.0016171737806871533, "entropy": 1.3474963903427124, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 985600, "episodes_total": 2464, "training_iteration": 77, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-23-24", "timestamp": 1660249404, "time_this_iter_s": 32.688453912734985, "time_total_s": 7824.113248586655, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7824.113248586655, "timesteps_since_restore": 985600, "iterations_since_restore": 77, "perf": {"cpu_util_percent": 40.12173913043479, "ram_util_percent": 58.68478260869566}} +{"episode_reward_max": 525.0, "episode_reward_min": 194.0, "episode_reward_mean": 380.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 190.065}, "custom_metrics": {"sparse_reward_mean": 129.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 121.33, "shaped_reward_min": 74, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.11, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.42, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 11.42, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.66, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 10.7, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.85, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.92, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.26, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.73, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.7, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.85, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.7, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.85, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0, 362.0, 399.0, 405.0, 396.0, 272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0, 376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3955189497497584, "mean_processing_ms": 0.5460179273755849, "mean_inference_ms": 2.943733434085924}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1872000, "num_steps_sampled": 998400, "sample_time_ms": 21499.137, "load_time_ms": 37.019, "grad_time_ms": 8919.956, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.323488999806367e-24, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033416959922760725, "policy_loss": -0.003114718245342374, "vf_loss": 71.20785522460938, "vf_explained_var": 0.7243476510047913, "kl": 0.001916095265187323, "entropy": 1.3287501335144043, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 998400, "episodes_total": 2496, "training_iteration": 78, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-23-54", "timestamp": 1660249434, "time_this_iter_s": 30.256299018859863, "time_total_s": 7854.3695476055145, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7854.3695476055145, "timesteps_since_restore": 998400, "iterations_since_restore": 78, "perf": {"cpu_util_percent": 32.448837209302326, "ram_util_percent": 58.1279069767442}} +{"episode_reward_max": 525.0, "episode_reward_min": 194.0, "episode_reward_mean": 380.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 190.095}, "custom_metrics": {"sparse_reward_mean": 130.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 120.19, "shaped_reward_min": 74, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.71, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 11.45, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 12.72, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.84, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.9, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.51, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.22, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.69, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.26, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.9, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.9, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0, 350.0, 345.0, 376.0, 352.0, 376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0, 416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3706807134794996, "mean_processing_ms": 0.5410767279997776, "mean_inference_ms": 2.9190331024760856}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1896000, "num_steps_sampled": 1011200, "sample_time_ms": 21546.342, "load_time_ms": 37.021, "grad_time_ms": 9020.101, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.617444999031835e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019535624887794256, "policy_loss": -0.0075730024836957455, "vf_loss": 62.825687408447266, "vf_explained_var": 0.7674410939216614, "kl": 0.001638473360799253, "entropy": 1.3262617588043213, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1011200, "episodes_total": 2528, "training_iteration": 79, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-24-22", "timestamp": 1660249462, "time_this_iter_s": 28.0881450176239, "time_total_s": 7882.457692623138, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7882.457692623138, "timesteps_since_restore": 1011200, "iterations_since_restore": 79, "perf": {"cpu_util_percent": 32.120000000000005, "ram_util_percent": 58.1375}} +{"episode_reward_max": 525.0, "episode_reward_min": 9.0, "episode_reward_mean": 390.28, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 195.14}, "custom_metrics": {"sparse_reward_mean": 134.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 122.28, "shaped_reward_min": 9, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.84, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.96, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 11.85, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.82, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.91, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.79, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.67, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.22, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.64, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.5, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 10.79, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.79, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0, 404.0, 408.0, 319.0, 344.0, 416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0, 408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3463084552547957, "mean_processing_ms": 0.5362072894726034, "mean_inference_ms": 2.894108730963018}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1920000, "num_steps_sampled": 1024000, "sample_time_ms": 21336.358, "load_time_ms": 36.943, "grad_time_ms": 8894.576, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.3087224995159173e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009382636635564268, "policy_loss": -0.007344415877014399, "vf_loss": 70.56519317626953, "vf_explained_var": 0.7276310324668884, "kl": 0.001774398609995842, "entropy": 1.3007346391677856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1024000, "episodes_total": 2560, "training_iteration": 80, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-24-48", "timestamp": 1660249488, "time_this_iter_s": 25.79700207710266, "time_total_s": 7908.254694700241, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7908.254694700241, "timesteps_since_restore": 1024000, "iterations_since_restore": 80, "perf": {"cpu_util_percent": 33.88055555555556, "ram_util_percent": 58.030555555555566}} +{"episode_reward_max": 522.0, "episode_reward_min": 9.0, "episode_reward_mean": 403.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 201.98}, "custom_metrics": {"sparse_reward_mean": 139.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 125.96, "shaped_reward_min": 9, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 12.09, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.06, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.96, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.9, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.08, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 12.3, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.23, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.96, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 11.08, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 12.3, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.08, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 12.3, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0, 296.0, 356.0, 387.0, 453.0, 408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0, 453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3225268766730203, "mean_processing_ms": 0.5314526800460904, "mean_inference_ms": 2.8694240871343926}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1944000, "num_steps_sampled": 1036800, "sample_time_ms": 21260.609, "load_time_ms": 36.811, "grad_time_ms": 8748.654, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.6543612497579586e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011773156002163887, "policy_loss": -0.006681745406240225, "vf_loss": 61.517730712890625, "vf_explained_var": 0.7553827166557312, "kl": 0.0021572383120656013, "entropy": 1.2946891784667969, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1036800, "episodes_total": 2592, "training_iteration": 81, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-25-15", "timestamp": 1660249515, "time_this_iter_s": 26.438808917999268, "time_total_s": 7934.69350361824, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7934.69350361824, "timesteps_since_restore": 1036800, "iterations_since_restore": 81, "perf": {"cpu_util_percent": 30.592105263157894, "ram_util_percent": 58.057894736842115}} +{"episode_reward_max": 522.0, "episode_reward_min": 9.0, "episode_reward_mean": 412.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 206.25}, "custom_metrics": {"sparse_reward_mean": 142.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 128.5, "shaped_reward_min": 9, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.49, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.04, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.53, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.01, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.9, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.56, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.31, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.3, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.19, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 11.56, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.31, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.56, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.31, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0, 288.0, 465.0, 384.0, 353.0, 453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0, 459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0]}, "sampler_perf": {"mean_env_wait_ms": 2.299333204784243, "mean_processing_ms": 0.5268100926342811, "mean_inference_ms": 2.8452761548654255}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1968000, "num_steps_sampled": 1049600, "sample_time_ms": 21056.327, "load_time_ms": 36.751, "grad_time_ms": 8586.844, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.271806248789793e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015234133461490273, "policy_loss": -0.007057014852762222, "vf_loss": 61.880123138427734, "vf_explained_var": 0.7578676342964172, "kl": 0.002027077367529273, "entropy": 1.3088246583938599, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1049600, "episodes_total": 2624, "training_iteration": 82, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-25-41", "timestamp": 1660249541, "time_this_iter_s": 26.555142879486084, "time_total_s": 7961.248646497726, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7961.248646497726, "timesteps_since_restore": 1049600, "iterations_since_restore": 82, "perf": {"cpu_util_percent": 33.34324324324324, "ram_util_percent": 58.07837837837838}} +{"episode_reward_max": 522.0, "episode_reward_min": 177.0, "episode_reward_mean": 411.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 205.56}, "custom_metrics": {"sparse_reward_mean": 141.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.92, "shaped_reward_min": 57, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.01, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 12.41, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.19, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.75, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.55, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.37, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.91, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.2, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.11, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.55, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.55, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0, 438.0, 408.0, 399.0, 410.0, 459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0, 316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2767232665412354, "mean_processing_ms": 0.5222866661980672, "mean_inference_ms": 2.821797669244603}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1992000, "num_steps_sampled": 1062400, "sample_time_ms": 20905.085, "load_time_ms": 36.646, "grad_time_ms": 8463.059, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.1359031243948966e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.00031348783522844315, "policy_loss": -0.006104966159909964, "vf_loss": 64.42190551757812, "vf_explained_var": 0.7651865482330322, "kl": 0.0017986185848712921, "entropy": 1.3014076948165894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1062400, "episodes_total": 2656, "training_iteration": 83, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-26-09", "timestamp": 1660249569, "time_this_iter_s": 27.776076078414917, "time_total_s": 7989.024722576141, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7989.024722576141, "timesteps_since_restore": 1062400, "iterations_since_restore": 83, "perf": {"cpu_util_percent": 33.82000000000001, "ram_util_percent": 58.82000000000001}} +{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 408.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 204.43}, "custom_metrics": {"sparse_reward_mean": 141.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 126.86, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.31, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.9, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.53, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.04, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.74, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.51, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 12.31, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.37, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.51, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 12.31, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.51, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 12.31, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0, 468.0, 465.0, 410.0, 344.0, 316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0, 350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0]}, "sampler_perf": {"mean_env_wait_ms": 2.254677940793998, "mean_processing_ms": 0.5178804318399313, "mean_inference_ms": 2.7992231247898705}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2016000, "num_steps_sampled": 1075200, "sample_time_ms": 20798.01, "load_time_ms": 36.647, "grad_time_ms": 8403.274, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.0679515621974483e-26, "cur_lr": 0.0010000000474974513, "total_loss": 0.001430995762348175, "policy_loss": -0.004144120961427689, "vf_loss": 62.17998123168945, "vf_explained_var": 0.801994264125824, "kl": 0.0024192428681999445, "entropy": 1.2857705354690552, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1075200, "episodes_total": 2688, "training_iteration": 84, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-26-39", "timestamp": 1660249599, "time_this_iter_s": 29.525622129440308, "time_total_s": 8018.550344705582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8018.550344705582, "timesteps_since_restore": 1075200, "iterations_since_restore": 84, "perf": {"cpu_util_percent": 28.77560975609756, "ram_util_percent": 58.31951219512194}} +{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 415.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 207.665}, "custom_metrics": {"sparse_reward_mean": 143.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 128.93, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.2, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.3, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.82, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 12.45, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.83, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 12.45, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 12.45, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0, 421.0, 413.0, 300.0, 355.0, 350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0, 126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2331698554816866, "mean_processing_ms": 0.5135892745725562, "mean_inference_ms": 2.7773374770155983}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2040000, "num_steps_sampled": 1088000, "sample_time_ms": 20542.192, "load_time_ms": 36.513, "grad_time_ms": 8356.672, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.0339757810987241e-26, "cur_lr": 0.0010000000474974513, "total_loss": 9.005811443785205e-05, "policy_loss": -0.005502933170646429, "vf_loss": 62.30662536621094, "vf_explained_var": 0.7652042508125305, "kl": 0.0015233332524076104, "entropy": 1.275335431098938, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1088000, "episodes_total": 2720, "training_iteration": 85, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-27-08", "timestamp": 1660249628, "time_this_iter_s": 29.196897983551025, "time_total_s": 8047.747242689133, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8047.747242689133, "timesteps_since_restore": 1088000, "iterations_since_restore": 85, "perf": {"cpu_util_percent": 31.616666666666664, "ram_util_percent": 58.38809523809524}} +{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 429.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 214.53}, "custom_metrics": {"sparse_reward_mean": 148.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 133.06, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.29, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.59, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.5, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.69, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.77, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.11, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.83, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.33, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.36, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.32, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.12, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.11, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.83, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.11, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.83, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0, 287.0, 406.0, 237.0, 450.0, 126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0, 516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2121816621028128, "mean_processing_ms": 0.509404154557512, "mean_inference_ms": 2.7561899846531217}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2064000, "num_steps_sampled": 1100800, "sample_time_ms": 20247.187, "load_time_ms": 36.147, "grad_time_ms": 8204.595, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.169878905493621e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034205808769911528, "policy_loss": -0.0028535639867186546, "vf_loss": 69.0377197265625, "vf_explained_var": 0.760657787322998, "kl": 0.002043861197307706, "entropy": 1.2592506408691406, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1100800, "episodes_total": 2752, "training_iteration": 86, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-27-37", "timestamp": 1660249657, "time_this_iter_s": 28.82673192024231, "time_total_s": 8076.573974609375, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8076.573974609375, "timesteps_since_restore": 1100800, "iterations_since_restore": 86, "perf": {"cpu_util_percent": 33.515, "ram_util_percent": 58.42750000000001}} +{"episode_reward_max": 570.0, "episode_reward_min": 142.0, "episode_reward_mean": 435.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 291.0}, "policy_reward_mean": {"ppo": 217.895}, "custom_metrics": {"sparse_reward_mean": 150.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 135.79, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.42, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.24, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.14, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.18, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 3.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.33, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.05, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.99, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.14, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.18, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.14, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.18, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0, 467.0, 459.0, 393.0, 413.0, 516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0, 452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0]}, "sampler_perf": {"mean_env_wait_ms": 2.191700194557133, "mean_processing_ms": 0.5053220551327391, "mean_inference_ms": 2.7357250964199444}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2088000, "num_steps_sampled": 1113600, "sample_time_ms": 20142.29, "load_time_ms": 35.752, "grad_time_ms": 8101.845, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.5849394527468104e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009223390952683985, "policy_loss": -0.005599660333245993, "vf_loss": 71.4854736328125, "vf_explained_var": 0.7612900733947754, "kl": 0.002260145964100957, "entropy": 1.2530813217163086, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1113600, "episodes_total": 2784, "training_iteration": 87, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-28-07", "timestamp": 1660249687, "time_this_iter_s": 30.60737180709839, "time_total_s": 8107.181346416473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8107.181346416473, "timesteps_since_restore": 1113600, "iterations_since_restore": 87, "perf": {"cpu_util_percent": 30.168181818181814, "ram_util_percent": 58.34545454545453}} +{"episode_reward_max": 570.0, "episode_reward_min": 142.0, "episode_reward_mean": 443.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 291.0}, "policy_reward_mean": {"ppo": 221.745}, "custom_metrics": {"sparse_reward_mean": 152.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 138.69, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.81, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.03, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.16, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.94, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.86, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.54, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 3.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.13, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.54, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.54, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0, 447.0, 473.0, 516.0, 384.0, 452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0, 461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0]}, "sampler_perf": {"mean_env_wait_ms": 2.1716872563286627, "mean_processing_ms": 0.5013361308454948, "mean_inference_ms": 2.7160497473266743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2112000, "num_steps_sampled": 1126400, "sample_time_ms": 20200.821, "load_time_ms": 36.161, "grad_time_ms": 8143.064, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2924697263734052e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.004522919189184904, "policy_loss": -0.0018036967376247048, "vf_loss": 69.45938110351562, "vf_explained_var": 0.7786126732826233, "kl": 0.001827276311814785, "entropy": 1.2386289834976196, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1126400, "episodes_total": 2816, "training_iteration": 88, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-28-39", "timestamp": 1660249719, "time_this_iter_s": 31.2521071434021, "time_total_s": 8138.4334535598755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8138.4334535598755, "timesteps_since_restore": 1126400, "iterations_since_restore": 88, "perf": {"cpu_util_percent": 29.57045454545455, "ram_util_percent": 58.37954545454544}} +{"episode_reward_max": 530.0, "episode_reward_min": 142.0, "episode_reward_mean": 443.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 221.975}, "custom_metrics": {"sparse_reward_mean": 152.6, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 138.75, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.22, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.68, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.81, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.97, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.52, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.12, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.08, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.52, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.12, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.52, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.12, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0, 504.0, 444.0, 355.0, 324.0, 461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0, 302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0]}, "sampler_perf": {"mean_env_wait_ms": 2.1521722183648633, "mean_processing_ms": 0.4974515268120716, "mean_inference_ms": 2.697593198173253}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2136000, "num_steps_sampled": 1139200, "sample_time_ms": 20564.193, "load_time_ms": 36.199, "grad_time_ms": 8238.148, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.462348631867026e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003657890483736992, "policy_loss": -0.005678663495928049, "vf_loss": 66.65350341796875, "vf_explained_var": 0.7769116759300232, "kl": 0.0020363712683320045, "entropy": 1.2417923212051392, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1139200, "episodes_total": 2848, "training_iteration": 89, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-29-11", "timestamp": 1660249751, "time_this_iter_s": 32.67408323287964, "time_total_s": 8171.107536792755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8171.107536792755, "timesteps_since_restore": 1139200, "iterations_since_restore": 89, "perf": {"cpu_util_percent": 27.073913043478264, "ram_util_percent": 58.2586956521739}} +{"episode_reward_max": 570.0, "episode_reward_min": 273.0, "episode_reward_mean": 452.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 130.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 226.1}, "custom_metrics": {"sparse_reward_mean": 156.0, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 140.2, "shaped_reward_min": 73, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.85, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.39, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.79, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.98, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.77, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 13.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.61, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.47, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.91, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.77, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 13.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.77, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 13.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0, 470.0, 419.0, 367.0, 341.0, 302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0, 473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0]}, "sampler_perf": {"mean_env_wait_ms": 2.133109953902478, "mean_processing_ms": 0.4936599987837839, "mean_inference_ms": 2.6799343629485026}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2160000, "num_steps_sampled": 1152000, "sample_time_ms": 20913.282, "load_time_ms": 36.475, "grad_time_ms": 8548.769, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.231174315933513e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.007687473203986883, "policy_loss": 0.0007542042876593769, "vf_loss": 75.50032806396484, "vf_explained_var": 0.7544476985931396, "kl": 0.0026988324243575335, "entropy": 1.2335320711135864, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1152000, "episodes_total": 2880, "training_iteration": 90, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-29-44", "timestamp": 1660249784, "time_this_iter_s": 32.394510984420776, "time_total_s": 8203.502047777176, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8203.502047777176, "timesteps_since_restore": 1152000, "iterations_since_restore": 90, "perf": {"cpu_util_percent": 31.20434782608696, "ram_util_percent": 58.23260869565217}} +{"episode_reward_max": 570.0, "episode_reward_min": 339.0, "episode_reward_mean": 459.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 163.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 229.53}, "custom_metrics": {"sparse_reward_mean": 158.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 142.26, "shaped_reward_min": 99, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.9, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.54, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.05, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.67, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.94, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 13.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.3, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.65, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 13.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 13.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0, 522.0, 406.0, 522.0, 467.0, 473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0, 398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0]}, "sampler_perf": {"mean_env_wait_ms": 2.114456633491147, "mean_processing_ms": 0.4899465714962644, "mean_inference_ms": 2.662192402592387}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2184000, "num_steps_sampled": 1164800, "sample_time_ms": 20895.618, "load_time_ms": 36.529, "grad_time_ms": 8787.695, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.6155871579667565e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.003199361963197589, "policy_loss": -0.003974525723606348, "vf_loss": 77.90489959716797, "vf_explained_var": 0.7496511936187744, "kl": 0.001869131694547832, "entropy": 1.2332016229629517, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1164800, "episodes_total": 2912, "training_iteration": 91, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-30-12", "timestamp": 1660249812, "time_this_iter_s": 28.652703046798706, "time_total_s": 8232.154750823975, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8232.154750823975, "timesteps_since_restore": 1164800, "iterations_since_restore": 91, "perf": {"cpu_util_percent": 34.4075, "ram_util_percent": 58.25750000000001}} +{"episode_reward_max": 570.0, "episode_reward_min": 296.0, "episode_reward_mean": 465.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 133.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 232.56}, "custom_metrics": {"sparse_reward_mean": 160.6, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 143.92, "shaped_reward_min": 96, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.61, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.75, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.24, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.81, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 14.12, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.77, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.5, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.18, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 14.12, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 14.12, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0, 478.0, 479.0, 507.0, 522.0, 398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0, 510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0961781849266816, "mean_processing_ms": 0.48630585862921505, "mean_inference_ms": 2.6442580305818364}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2208000, "num_steps_sampled": 1177600, "sample_time_ms": 21020.172, "load_time_ms": 36.643, "grad_time_ms": 9041.599, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 8.077935789833782e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.009082547388970852, "policy_loss": 0.0023276470601558685, "vf_loss": 73.66332244873047, "vf_explained_var": 0.7556483149528503, "kl": 0.003871823428198695, "entropy": 1.2228628396987915, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1177600, "episodes_total": 2944, "training_iteration": 92, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-30-43", "timestamp": 1660249843, "time_this_iter_s": 30.34039807319641, "time_total_s": 8262.495148897171, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8262.495148897171, "timesteps_since_restore": 1177600, "iterations_since_restore": 92, "perf": {"cpu_util_percent": 30.85116279069767, "ram_util_percent": 58.283720930232555}} +{"episode_reward_max": 573.0, "episode_reward_min": 230.0, "episode_reward_mean": 467.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 106.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 233.655}, "custom_metrics": {"sparse_reward_mean": 161.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 144.11, "shaped_reward_min": 70, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.51, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.74, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.27, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.68, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.66, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.2, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 0.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.66, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.2, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.66, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.2, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0, 401.0, 516.0, 455.0, 516.0, 510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0, 513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0782697759005244, "mean_processing_ms": 0.48273945160484155, "mean_inference_ms": 2.6262237371699033}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2232000, "num_steps_sampled": 1190400, "sample_time_ms": 21149.002, "load_time_ms": 36.704, "grad_time_ms": 9227.888, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.038967894916891e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010848678648471832, "policy_loss": -0.005190685391426086, "vf_loss": 68.93277740478516, "vf_explained_var": 0.7608636021614075, "kl": 0.0020486123394221067, "entropy": 1.2354419231414795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1190400, "episodes_total": 2976, "training_iteration": 93, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-31-14", "timestamp": 1660249874, "time_this_iter_s": 30.926449298858643, "time_total_s": 8293.42159819603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8293.42159819603, "timesteps_since_restore": 1190400, "iterations_since_restore": 93, "perf": {"cpu_util_percent": 31.343181818181815, "ram_util_percent": 58.222727272727276}} +{"episode_reward_max": 573.0, "episode_reward_min": 63.0, "episode_reward_mean": 462.26, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 231.13}, "custom_metrics": {"sparse_reward_mean": 160.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 141.86, "shaped_reward_min": 23, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.41, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.74, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.57, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.05, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.39, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.12, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.39, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.41, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.22, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.19, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.39, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.12, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.39, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.12, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0, 416.0, 407.0, 392.0, 516.0, 513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0, 444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0]}, "sampler_perf": {"mean_env_wait_ms": 2.060765834369586, "mean_processing_ms": 0.47925634757705055, "mean_inference_ms": 2.608906134199901}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2256000, "num_steps_sampled": 1203200, "sample_time_ms": 21186.356, "load_time_ms": 36.604, "grad_time_ms": 9318.639, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.0194839474584456e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.00026301448815502226, "policy_loss": -0.005903394427150488, "vf_loss": 67.8399658203125, "vf_explained_var": 0.7958834171295166, "kl": 0.0017271721735596657, "entropy": 1.2351765632629395, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1203200, "episodes_total": 3008, "training_iteration": 94, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-31-44", "timestamp": 1660249904, "time_this_iter_s": 30.80340886116028, "time_total_s": 8324.22500705719, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8324.22500705719, "timesteps_since_restore": 1203200, "iterations_since_restore": 94, "perf": {"cpu_util_percent": 33.47727272727272, "ram_util_percent": 58.21363636363637}} +{"episode_reward_max": 573.0, "episode_reward_min": 63.0, "episode_reward_mean": 461.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 230.965}, "custom_metrics": {"sparse_reward_mean": 160.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 141.53, "shaped_reward_min": 23, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.54, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.69, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.8, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.97, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.44, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.16, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.44, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.16, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.44, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.16, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0, 522.0, 453.0, 421.0, 438.0, 444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0, 482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0]}, "sampler_perf": {"mean_env_wait_ms": 2.043636062637732, "mean_processing_ms": 0.47584888992539853, "mean_inference_ms": 2.592145322205639}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2280000, "num_steps_sampled": 1216000, "sample_time_ms": 21306.082, "load_time_ms": 36.597, "grad_time_ms": 9453.053, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.0097419737292228e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.001733560231514275, "policy_loss": -0.004548916593194008, "vf_loss": 68.90572357177734, "vf_explained_var": 0.7648184895515442, "kl": 0.0019422214245423675, "entropy": 1.216185212135315, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1216000, "episodes_total": 3040, "training_iteration": 95, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-32-16", "timestamp": 1660249936, "time_this_iter_s": 31.733500242233276, "time_total_s": 8355.958507299423, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8355.958507299423, "timesteps_since_restore": 1216000, "iterations_since_restore": 95, "perf": {"cpu_util_percent": 30.170454545454547, "ram_util_percent": 58.22272727272727}} +{"episode_reward_max": 576.0, "episode_reward_min": 63.0, "episode_reward_mean": 464.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 288.0}, "policy_reward_mean": {"ppo": 232.385}, "custom_metrics": {"sparse_reward_mean": 161.0, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 142.77, "shaped_reward_min": 23, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.82, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.82, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.84, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.65, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.83, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.34, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.18, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.09, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.83, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.83, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0, 522.0, 416.0, 478.0, 510.0, 482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0, 441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0268528055210124, "mean_processing_ms": 0.47250865851188434, "mean_inference_ms": 2.5756730471163247}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2304000, "num_steps_sampled": 1228800, "sample_time_ms": 21320.865, "load_time_ms": 36.696, "grad_time_ms": 9593.729, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 5.048709868646114e-30, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013039499754086137, "policy_loss": -0.007722912821918726, "vf_loss": 70.26915740966797, "vf_explained_var": 0.7757861018180847, "kl": 0.001609964296221733, "entropy": 1.2159069776535034, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1228800, "episodes_total": 3072, "training_iteration": 96, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-32-47", "timestamp": 1660249967, "time_this_iter_s": 30.381797075271606, "time_total_s": 8386.340304374695, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8386.340304374695, "timesteps_since_restore": 1228800, "iterations_since_restore": 96, "perf": {"cpu_util_percent": 31.758139534883718, "ram_util_percent": 58.16046511627907}} +{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 470.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 235.265}, "custom_metrics": {"sparse_reward_mean": 163.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 144.13, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.66, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.25, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.85, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.05, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.55, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.16, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.08, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.05, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.05, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0, 456.0, 462.0, 513.0, 519.0, 441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0, 447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0104424851575393, "mean_processing_ms": 0.4692484587452608, "mean_inference_ms": 2.5596152283112645}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2328000, "num_steps_sampled": 1241600, "sample_time_ms": 21335.822, "load_time_ms": 37.307, "grad_time_ms": 9680.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.524354934323057e-30, "cur_lr": 0.0010000000474974513, "total_loss": 0.001703931367956102, "policy_loss": -0.005316242575645447, "vf_loss": 76.20516204833984, "vf_explained_var": 0.7805307507514954, "kl": 0.002101513324305415, "entropy": 1.2007073163986206, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1241600, "episodes_total": 3104, "training_iteration": 97, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-33-18", "timestamp": 1660249998, "time_this_iter_s": 31.63303232192993, "time_total_s": 8417.973336696625, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8417.973336696625, "timesteps_since_restore": 1241600, "iterations_since_restore": 97, "perf": {"cpu_util_percent": 34.47777777777778, "ram_util_percent": 58.24}} +{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 478.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 239.385}, "custom_metrics": {"sparse_reward_mean": 166.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 146.77, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.02, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.94, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.14, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.12, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.32, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.63, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.13, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.12, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.32, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.12, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.32, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0, 519.0, 513.0, 464.0, 513.0, 447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0, 525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9943851070346994, "mean_processing_ms": 0.4660638204377501, "mean_inference_ms": 2.5438091293770433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2352000, "num_steps_sampled": 1254400, "sample_time_ms": 21313.715, "load_time_ms": 36.778, "grad_time_ms": 9499.638, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.2621774671615285e-30, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019398670410737395, "policy_loss": -0.005300960969179869, "vf_loss": 78.3524398803711, "vf_explained_var": 0.7676915526390076, "kl": 0.0015995064750313759, "entropy": 1.188806414604187, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1254400, "episodes_total": 3136, "training_iteration": 98, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-33-47", "timestamp": 1660250027, "time_this_iter_s": 29.219672203063965, "time_total_s": 8447.193008899689, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8447.193008899689, "timesteps_since_restore": 1254400, "iterations_since_restore": 98, "perf": {"cpu_util_percent": 33.91190476190476, "ram_util_percent": 58.29285714285714}} +{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 485.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 242.725}, "custom_metrics": {"sparse_reward_mean": 168.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 148.65, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.85, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.04, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.11, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.11, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.11, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0, 525.0, 510.0, 450.0, 484.0, 525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0, 473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0]}, "sampler_perf": {"mean_env_wait_ms": 1.978664058751599, "mean_processing_ms": 0.4629441310230788, "mean_inference_ms": 2.5282732021268366}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2376000, "num_steps_sampled": 1267200, "sample_time_ms": 21031.143, "load_time_ms": 36.859, "grad_time_ms": 9358.331, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 6.3108873358076425e-31, "cur_lr": 0.0010000000474974513, "total_loss": 0.00523378886282444, "policy_loss": -0.0017726494697853923, "vf_loss": 76.06880950927734, "vf_explained_var": 0.753373920917511, "kl": 0.001648509525693953, "entropy": 1.2008789777755737, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1267200, "episodes_total": 3168, "training_iteration": 99, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-34-16", "timestamp": 1660250056, "time_this_iter_s": 28.433568000793457, "time_total_s": 8475.626576900482, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8475.626576900482, "timesteps_since_restore": 1267200, "iterations_since_restore": 99, "perf": {"cpu_util_percent": 33.795, "ram_util_percent": 58.2625}} +{"episode_reward_max": 582.0, "episode_reward_min": 128.0, "episode_reward_mean": 480.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 240.47}, "custom_metrics": {"sparse_reward_mean": 166.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 147.34, "shaped_reward_min": 48, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.37, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.59, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.1, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.32, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.21, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.92, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.23, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.32, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.21, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.32, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.21, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0, 128.0, 510.0, 579.0, 317.0, 473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0, 458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9632342892537746, "mean_processing_ms": 0.4598755283783044, "mean_inference_ms": 2.5128753018749035}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2400000, "num_steps_sampled": 1280000, "sample_time_ms": 20819.102, "load_time_ms": 36.792, "grad_time_ms": 9258.115, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.1554436679038213e-31, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015751657774671912, "policy_loss": -0.008594638668000698, "vf_loss": 76.2179183959961, "vf_explained_var": 0.7723303437232971, "kl": 0.002320505678653717, "entropy": 1.2046717405319214, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1280000, "episodes_total": 3200, "training_iteration": 100, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-34-45", "timestamp": 1660250085, "time_this_iter_s": 29.270292043685913, "time_total_s": 8504.896868944168, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8504.896868944168, "timesteps_since_restore": 1280000, "iterations_since_restore": 100, "perf": {"cpu_util_percent": 32.380487804878044, "ram_util_percent": 58.27560975609755}} +{"episode_reward_max": 582.0, "episode_reward_min": 177.0, "episode_reward_mean": 476.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 238.185}, "custom_metrics": {"sparse_reward_mean": 165.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 145.97, "shaped_reward_min": 57, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.43, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.61, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.93, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.79, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 1.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.83, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.36, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.91, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.36, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.91, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.36, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.91, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0, 530.0, 513.0, 344.0, 447.0, 458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0, 525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9480966486925166, "mean_processing_ms": 0.45685958478274097, "mean_inference_ms": 2.497748516500124}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2424000, "num_steps_sampled": 1292800, "sample_time_ms": 20962.686, "load_time_ms": 36.873, "grad_time_ms": 9154.171, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.5777218339519106e-31, "cur_lr": 0.0010000000474974513, "total_loss": 0.00011571295181056485, "policy_loss": -0.006676681339740753, "vf_loss": 73.92855834960938, "vf_explained_var": 0.7952176928520203, "kl": 0.0016933353617787361, "entropy": 1.2009243965148926, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1292800, "episodes_total": 3232, "training_iteration": 101, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-35-15", "timestamp": 1660250115, "time_this_iter_s": 29.051042795181274, "time_total_s": 8533.94791173935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8533.94791173935, "timesteps_since_restore": 1292800, "iterations_since_restore": 101, "perf": {"cpu_util_percent": 29.43571428571429, "ram_util_percent": 58.27142857142857}} +{"episode_reward_max": 582.0, "episode_reward_min": 177.0, "episode_reward_mean": 479.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 239.545}, "custom_metrics": {"sparse_reward_mean": 166.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 147.09, "shaped_reward_min": 57, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.17, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.32, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.87, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.26, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.29, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0, 519.0, 406.0, 467.0, 498.0, 525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0, 447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9332518889221049, "mean_processing_ms": 0.4539033753742656, "mean_inference_ms": 2.483090315311066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2448000, "num_steps_sampled": 1305600, "sample_time_ms": 21004.371, "load_time_ms": 37.153, "grad_time_ms": 9035.152, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.888609169759553e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.00245770625770092, "policy_loss": -0.003976076375693083, "vf_loss": 70.27108764648438, "vf_explained_var": 0.7766384482383728, "kl": 0.001931712031364441, "entropy": 1.1866337060928345, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1305600, "episodes_total": 3264, "training_iteration": 102, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-35-45", "timestamp": 1660250145, "time_this_iter_s": 29.56961703300476, "time_total_s": 8563.517528772354, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8563.517528772354, "timesteps_since_restore": 1305600, "iterations_since_restore": 102, "perf": {"cpu_util_percent": 29.607142857142858, "ram_util_percent": 58.27380952380951}} +{"episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 488.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 244.08}, "custom_metrics": {"sparse_reward_mean": 169.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 149.76, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.19, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.44, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.55, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.85, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.33, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.56, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.43, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.33, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.56, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.33, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.56, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0, 525.0, 516.0, 530.0, 404.0, 447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0, 530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9186906528346697, "mean_processing_ms": 0.45100660003140314, "mean_inference_ms": 2.4688879649561444}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2472000, "num_steps_sampled": 1318400, "sample_time_ms": 21037.799, "load_time_ms": 37.038, "grad_time_ms": 8875.986, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.9443045848797766e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011967640602961183, "policy_loss": -0.0056260935962200165, "vf_loss": 74.17142486572266, "vf_explained_var": 0.7644608616828918, "kl": 0.0018772757612168789, "entropy": 1.188565731048584, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1318400, "episodes_total": 3296, "training_iteration": 103, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-36-14", "timestamp": 1660250174, "time_this_iter_s": 29.66763973236084, "time_total_s": 8593.185168504715, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8593.185168504715, "timesteps_since_restore": 1318400, "iterations_since_restore": 103, "perf": {"cpu_util_percent": 31.43809523809524, "ram_util_percent": 58.23095238095237}} +{"episode_reward_max": 579.0, "episode_reward_min": 350.0, "episode_reward_mean": 503.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 251.985}, "custom_metrics": {"sparse_reward_mean": 175.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 153.17, "shaped_reward_min": 104, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.84, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.76, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.58, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.95, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.34, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.58, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.95, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.58, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.95, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0, 570.0, 504.0, 530.0, 513.0, 530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0, 519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9044104614216666, "mean_processing_ms": 0.44816608164827715, "mean_inference_ms": 2.4550939840364316}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2496000, "num_steps_sampled": 1331200, "sample_time_ms": 21059.641, "load_time_ms": 36.972, "grad_time_ms": 8872.978, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.9721522924398883e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.002359784208238125, "policy_loss": -0.004356598015874624, "vf_loss": 73.04959106445312, "vf_explained_var": 0.7670376896858215, "kl": 0.0017897128127515316, "entropy": 1.1771515607833862, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1331200, "episodes_total": 3328, "training_iteration": 104, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-36-45", "timestamp": 1660250205, "time_this_iter_s": 30.991883993148804, "time_total_s": 8624.177052497864, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8624.177052497864, "timesteps_since_restore": 1331200, "iterations_since_restore": 104, "perf": {"cpu_util_percent": 35.49545454545455, "ram_util_percent": 58.338636363636354}} +{"episode_reward_max": 579.0, "episode_reward_min": 350.0, "episode_reward_mean": 506.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 253.335}, "custom_metrics": {"sparse_reward_mean": 176.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 153.87, "shaped_reward_min": 104, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.18, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.08, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.52, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.78, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.76, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.64, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.78, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.76, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.78, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.76, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0, 516.0, 516.0, 513.0, 470.0, 519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0, 573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0]}, "sampler_perf": {"mean_env_wait_ms": 1.890394604549175, "mean_processing_ms": 0.4453764179105544, "mean_inference_ms": 2.441389523770417}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2520000, "num_steps_sampled": 1344000, "sample_time_ms": 20963.704, "load_time_ms": 37.317, "grad_time_ms": 8768.329, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.860761462199441e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.002573954639956355, "policy_loss": -0.004460552707314491, "vf_loss": 76.1985855102539, "vf_explained_var": 0.7691051959991455, "kl": 0.002485529985278845, "entropy": 1.1707229614257812, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1344000, "episodes_total": 3360, "training_iteration": 105, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-37-15", "timestamp": 1660250235, "time_this_iter_s": 29.730670928955078, "time_total_s": 8653.907723426819, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8653.907723426819, "timesteps_since_restore": 1344000, "iterations_since_restore": 105, "perf": {"cpu_util_percent": 34.530952380952385, "ram_util_percent": 58.22619047619047}} +{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 508.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 254.47}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 154.94, "shaped_reward_min": 110, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.45, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.97, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.88, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.73, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.27, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.19, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.88, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.73, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.88, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.73, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0, 525.0, 450.0, 482.0, 444.0, 573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0, 467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8766403613441696, "mean_processing_ms": 0.44263891296008706, "mean_inference_ms": 2.427842279334728}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2544000, "num_steps_sampled": 1356800, "sample_time_ms": 20984.654, "load_time_ms": 37.36, "grad_time_ms": 8798.762, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.930380731099721e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012900071451440454, "policy_loss": -0.00579724321141839, "vf_loss": 76.69783782958984, "vf_explained_var": 0.7642709612846375, "kl": 0.0020595567766577005, "entropy": 1.1650750637054443, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1356800, "episodes_total": 3392, "training_iteration": 106, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-37-46", "timestamp": 1660250266, "time_this_iter_s": 30.89556574821472, "time_total_s": 8684.803289175034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8684.803289175034, "timesteps_since_restore": 1356800, "iterations_since_restore": 106, "perf": {"cpu_util_percent": 35.19772727272727, "ram_util_percent": 58.284090909090885}} +{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 509.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 254.605}, "custom_metrics": {"sparse_reward_mean": 176.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 155.61, "shaped_reward_min": 110, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.17, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.24, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.15, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0, 524.0, 536.0, 447.0, 452.0, 467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0, 462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0]}, "sampler_perf": {"mean_env_wait_ms": 1.863128513341346, "mean_processing_ms": 0.43994753351318544, "mean_inference_ms": 2.414282806471956}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2568000, "num_steps_sampled": 1369600, "sample_time_ms": 20842.684, "load_time_ms": 37.196, "grad_time_ms": 8837.48, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.4651903655498604e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005029598250985146, "policy_loss": -0.0072616818360984325, "vf_loss": 83.45578002929688, "vf_explained_var": 0.7516160011291504, "kl": 0.0016769097419455647, "entropy": 1.1618729829788208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1369600, "episodes_total": 3424, "training_iteration": 107, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-38-17", "timestamp": 1660250297, "time_this_iter_s": 30.596415996551514, "time_total_s": 8715.399705171585, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8715.399705171585, "timesteps_since_restore": 1369600, "iterations_since_restore": 107, "perf": {"cpu_util_percent": 35.45581395348837, "ram_util_percent": 58.237209302325574}} +{"episode_reward_max": 582.0, "episode_reward_min": 390.0, "episode_reward_mean": 513.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 297.0}, "policy_reward_mean": {"ppo": 256.905}, "custom_metrics": {"sparse_reward_mean": 178.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 157.41, "shaped_reward_min": 110, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.95, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.15, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.18, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.18, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.95, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.15, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.95, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.15, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0, 516.0, 498.0, 530.0, 576.0, 462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0, 525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8498555104022234, "mean_processing_ms": 0.43730168549860937, "mean_inference_ms": 2.400809449110995}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2592000, "num_steps_sampled": 1382400, "sample_time_ms": 20702.738, "load_time_ms": 37.249, "grad_time_ms": 9000.45, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2325951827749302e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.004501763265579939, "policy_loss": -0.002659810474142432, "vf_loss": 77.439453125, "vf_explained_var": 0.7766797542572021, "kl": 0.002080060075968504, "entropy": 1.1647237539291382, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1382400, "episodes_total": 3456, "training_iteration": 108, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-38-46", "timestamp": 1660250326, "time_this_iter_s": 29.44796586036682, "time_total_s": 8744.847671031952, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8744.847671031952, "timesteps_since_restore": 1382400, "iterations_since_restore": 108, "perf": {"cpu_util_percent": 35.6452380952381, "ram_util_percent": 58.221428571428575}} +{"episode_reward_max": 582.0, "episode_reward_min": 404.0, "episode_reward_mean": 517.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 297.0}, "policy_reward_mean": {"ppo": 258.575}, "custom_metrics": {"sparse_reward_mean": 179.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 158.75, "shaped_reward_min": 121, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.45, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.24, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.24, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.24, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0, 525.0, 527.0, 525.0, 516.0, 525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0, 495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8368123627881316, "mean_processing_ms": 0.4346999202219803, "mean_inference_ms": 2.3873597355001146}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2616000, "num_steps_sampled": 1395200, "sample_time_ms": 20622.092, "load_time_ms": 37.297, "grad_time_ms": 9168.951, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.162975913874651e-34, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004981299280188978, "policy_loss": -0.007735797669738531, "vf_loss": 78.24005889892578, "vf_explained_var": 0.7600134015083313, "kl": 0.0021366437431424856, "entropy": 1.1726828813552856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1395200, "episodes_total": 3488, "training_iteration": 109, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-39-16", "timestamp": 1660250356, "time_this_iter_s": 29.312750816345215, "time_total_s": 8774.160421848297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8774.160421848297, "timesteps_since_restore": 1395200, "iterations_since_restore": 109, "perf": {"cpu_util_percent": 34.91219512195122, "ram_util_percent": 58.29999999999999}} +{"episode_reward_max": 582.0, "episode_reward_min": 390.0, "episode_reward_mean": 513.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 256.885}, "custom_metrics": {"sparse_reward_mean": 177.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 158.17, "shaped_reward_min": 116, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.47, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 14.01, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.08, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.01, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.08, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.01, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.08, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0, 515.0, 573.0, 522.0, 404.0, 495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0, 498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8240069603143507, "mean_processing_ms": 0.43214623654293904, "mean_inference_ms": 2.374213479802633}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2640000, "num_steps_sampled": 1408000, "sample_time_ms": 20611.05, "load_time_ms": 37.227, "grad_time_ms": 9292.4, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.0814879569373254e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.006001986563205719, "policy_loss": -0.0016462085768580437, "vf_loss": 82.35639953613281, "vf_explained_var": 0.7567899823188782, "kl": 0.0018497154815122485, "entropy": 1.174903154373169, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1408000, "episodes_total": 3520, "training_iteration": 110, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-39-46", "timestamp": 1660250386, "time_this_iter_s": 30.394602060317993, "time_total_s": 8804.555023908615, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8804.555023908615, "timesteps_since_restore": 1408000, "iterations_since_restore": 110, "perf": {"cpu_util_percent": 34.141860465116274, "ram_util_percent": 58.19302325581395}} +{"episode_reward_max": 579.0, "episode_reward_min": 390.0, "episode_reward_mean": 511.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 299.0}, "policy_reward_mean": {"ppo": 255.505}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 157.01, "shaped_reward_min": 116, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.74, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.02, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.02, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.02, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0, 527.0, 533.0, 473.0, 465.0, 498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0, 396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8114390530893243, "mean_processing_ms": 0.4296416576287852, "mean_inference_ms": 2.361419613419488}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2664000, "num_steps_sampled": 1420800, "sample_time_ms": 20548.052, "load_time_ms": 37.151, "grad_time_ms": 9411.24, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.5407439784686627e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037929911632090807, "policy_loss": -0.003666130593046546, "vf_loss": 80.49629974365234, "vf_explained_var": 0.7547799944877625, "kl": 0.002083237050101161, "entropy": 1.1810179948806763, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1420800, "episodes_total": 3552, "training_iteration": 111, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-40-16", "timestamp": 1660250416, "time_this_iter_s": 29.606700897216797, "time_total_s": 8834.161724805832, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8834.161724805832, "timesteps_since_restore": 1420800, "iterations_since_restore": 111, "perf": {"cpu_util_percent": 34.73809523809524, "ram_util_percent": 58.2547619047619}} +{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 510.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 299.0}, "policy_reward_mean": {"ppo": 255.4}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 156.8, "shaped_reward_min": 115, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.89, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.96, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.89, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.89, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0, 576.0, 516.0, 525.0, 524.0, 396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0, 576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7991027412203113, "mean_processing_ms": 0.4271859275092463, "mean_inference_ms": 2.3490714276117277}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2688000, "num_steps_sampled": 1433600, "sample_time_ms": 20492.657, "load_time_ms": 37.09, "grad_time_ms": 9588.563, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 7.703719892343314e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006233404856175184, "policy_loss": -0.00705617293715477, "vf_loss": 82.67167663574219, "vf_explained_var": 0.7619187235832214, "kl": 0.0019442345947027206, "entropy": 1.175291895866394, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1433600, "episodes_total": 3584, "training_iteration": 112, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-40-46", "timestamp": 1660250446, "time_this_iter_s": 30.788507223129272, "time_total_s": 8864.950232028961, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8864.950232028961, "timesteps_since_restore": 1433600, "iterations_since_restore": 112, "perf": {"cpu_util_percent": 33.402325581395345, "ram_util_percent": 58.2720930232558}} +{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 509.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 254.9}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 155.8, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.21, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.54, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.99, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.49, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.89, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.89, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.89, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0, 498.0, 510.0, 516.0, 522.0, 576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0, 507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7869849940199092, "mean_processing_ms": 0.4247741951637306, "mean_inference_ms": 2.3369101082045303}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2712000, "num_steps_sampled": 1446400, "sample_time_ms": 20390.667, "load_time_ms": 37.347, "grad_time_ms": 9843.106, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.851859946171657e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0046621630899608135, "policy_loss": -0.0027472442016005516, "vf_loss": 80.03414916992188, "vf_explained_var": 0.7775616645812988, "kl": 0.0022294942755252123, "entropy": 1.1880191564559937, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1446400, "episodes_total": 3616, "training_iteration": 113, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-41-18", "timestamp": 1660250478, "time_this_iter_s": 31.195298194885254, "time_total_s": 8896.145530223846, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8896.145530223846, "timesteps_since_restore": 1446400, "iterations_since_restore": 113, "perf": {"cpu_util_percent": 34.73181818181818, "ram_util_percent": 58.20681818181818}} +{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 507.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 253.95}, "custom_metrics": {"sparse_reward_mean": 175.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.3, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.0, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.27, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.44, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.7, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.77, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.86, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.77, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.77, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0, 522.0, 422.0, 465.0, 482.0, 507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0, 570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0]}, "sampler_perf": {"mean_env_wait_ms": 1.775081398673085, "mean_processing_ms": 0.4224038975736352, "mean_inference_ms": 2.3249390744039835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2736000, "num_steps_sampled": 1459200, "sample_time_ms": 20266.804, "load_time_ms": 37.384, "grad_time_ms": 9843.514, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.9259299730858284e-35, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005095542292110622, "policy_loss": -0.007585855200886726, "vf_loss": 76.6465072631836, "vf_explained_var": 0.7633175849914551, "kl": 0.0019467826932668686, "entropy": 1.1766948699951172, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1459200, "episodes_total": 3648, "training_iteration": 114, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-41-47", "timestamp": 1660250507, "time_this_iter_s": 29.75877094268799, "time_total_s": 8925.904301166534, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8925.904301166534, "timesteps_since_restore": 1459200, "iterations_since_restore": 114, "perf": {"cpu_util_percent": 34.199999999999996, "ram_util_percent": 58.228571428571435}} +{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 512.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 256.345}, "custom_metrics": {"sparse_reward_mean": 177.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 157.09, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.25, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.9, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.71, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.9, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.9, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0, 522.0, 513.0, 570.0, 525.0, 570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0, 519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7633797791154415, "mean_processing_ms": 0.42007059281459463, "mean_inference_ms": 2.3128736955847145}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2760000, "num_steps_sampled": 1472000, "sample_time_ms": 20145.685, "load_time_ms": 37.355, "grad_time_ms": 9802.708, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.629649865429142e-36, "cur_lr": 0.0010000000474974513, "total_loss": -0.001834428054280579, "policy_loss": -0.008199676871299744, "vf_loss": 69.51961517333984, "vf_explained_var": 0.7745820879936218, "kl": 0.002087961183860898, "entropy": 1.1734023094177246, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1472000, "episodes_total": 3680, "training_iteration": 115, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-42-15", "timestamp": 1660250535, "time_this_iter_s": 28.112826824188232, "time_total_s": 8954.017127990723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8954.017127990723, "timesteps_since_restore": 1472000, "iterations_since_restore": 115, "perf": {"cpu_util_percent": 35.5225, "ram_util_percent": 58.23499999999999}} +{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 256.435}, "custom_metrics": {"sparse_reward_mean": 178.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.47, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.8, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.04, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.65, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.04, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.04, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0, 573.0, 530.0, 492.0, 522.0, 519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0, 522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7518867216658078, "mean_processing_ms": 0.4177743622386954, "mean_inference_ms": 2.300854372548392}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2784000, "num_steps_sampled": 1484800, "sample_time_ms": 20021.401, "load_time_ms": 37.489, "grad_time_ms": 9776.788, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.814824932714571e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.00206244015134871, "policy_loss": -0.005156705155968666, "vf_loss": 78.12344360351562, "vf_explained_var": 0.7715883851051331, "kl": 0.0026745833456516266, "entropy": 1.1864006519317627, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1484800, "episodes_total": 3712, "training_iteration": 116, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-42-45", "timestamp": 1660250565, "time_this_iter_s": 29.3955659866333, "time_total_s": 8983.412693977356, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8983.412693977356, "timesteps_since_restore": 1484800, "iterations_since_restore": 116, "perf": {"cpu_util_percent": 35.66904761904762, "ram_util_percent": 58.25}} +{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 513.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 256.76}, "custom_metrics": {"sparse_reward_mean": 178.4, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.72, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.34, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.81, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0, 464.0, 576.0, 573.0, 522.0, 522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0, 573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7406015862798738, "mean_processing_ms": 0.4155246745297136, "mean_inference_ms": 2.2890629120226706}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2808000, "num_steps_sampled": 1497600, "sample_time_ms": 20042.808, "load_time_ms": 37.283, "grad_time_ms": 9721.955, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.4074124663572855e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.00018632395949680358, "policy_loss": -0.007065422832965851, "vf_loss": 78.37664794921875, "vf_explained_var": 0.7705362439155579, "kl": 0.0016083299415186048, "entropy": 1.171847939491272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1497600, "episodes_total": 3744, "training_iteration": 117, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-43-15", "timestamp": 1660250595, "time_this_iter_s": 30.259077787399292, "time_total_s": 9013.671771764755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9013.671771764755, "timesteps_since_restore": 1497600, "iterations_since_restore": 117, "perf": {"cpu_util_percent": 36.71627906976744, "ram_util_percent": 58.1906976744186}} +{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 256.43}, "custom_metrics": {"sparse_reward_mean": 178.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.86, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.99, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.63, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.1, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.01, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.07, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.1, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.01, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.1, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.01, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0, 459.0, 525.0, 570.0, 525.0, 573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0, 519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7295179891011523, "mean_processing_ms": 0.4133177002223833, "mean_inference_ms": 2.2776485040760637}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2832000, "num_steps_sampled": 1510400, "sample_time_ms": 20089.612, "load_time_ms": 37.29, "grad_time_ms": 9589.898, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2037062331786428e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031413733959198, "policy_loss": -0.00419240677729249, "vf_loss": 79.22246551513672, "vf_explained_var": 0.7654686570167542, "kl": 0.0017640552250668406, "entropy": 1.1769217252731323, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1510400, "episodes_total": 3776, "training_iteration": 118, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-43-44", "timestamp": 1660250624, "time_this_iter_s": 28.596869230270386, "time_total_s": 9042.268640995026, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9042.268640995026, "timesteps_since_restore": 1510400, "iterations_since_restore": 118, "perf": {"cpu_util_percent": 34.160000000000004, "ram_util_percent": 58.245000000000005}} +{"episode_reward_max": 582.0, "episode_reward_min": 222.0, "episode_reward_mean": 517.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 105.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 258.905}, "custom_metrics": {"sparse_reward_mean": 179.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 159.01, "shaped_reward_min": 62, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.89, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.83, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.07, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.8, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.46, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.8, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.8, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0, 582.0, 461.0, 456.0, 501.0, 519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0, 530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7186253925564765, "mean_processing_ms": 0.4111520953158576, "mean_inference_ms": 2.2666653584244876}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2856000, "num_steps_sampled": 1523200, "sample_time_ms": 20187.204, "load_time_ms": 37.338, "grad_time_ms": 9575.103, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.018531165893214e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.002520867856219411, "policy_loss": -0.0053411815315485, "vf_loss": 84.43938446044922, "vf_explained_var": 0.7439851760864258, "kl": 0.0023064902052283287, "entropy": 1.1637717485427856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1523200, "episodes_total": 3808, "training_iteration": 119, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-44-14", "timestamp": 1660250654, "time_this_iter_s": 30.140514850616455, "time_total_s": 9072.409155845642, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9072.409155845642, "timesteps_since_restore": 1523200, "iterations_since_restore": 119, "perf": {"cpu_util_percent": 32.737209302325574, "ram_util_percent": 58.15116279069766}} +{"episode_reward_max": 582.0, "episode_reward_min": 398.0, "episode_reward_mean": 521.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 260.715}, "custom_metrics": {"sparse_reward_mean": 180.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 159.83, "shaped_reward_min": 118, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.01, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.87, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.87, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.87, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0, 456.0, 461.0, 516.0, 522.0, 530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0, 570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7079238505690706, "mean_processing_ms": 0.40902061791269845, "mean_inference_ms": 2.2560343134520804}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2880000, "num_steps_sampled": 1536000, "sample_time_ms": 20247.355, "load_time_ms": 37.42, "grad_time_ms": 9479.74, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.009265582946607e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019442923367023468, "policy_loss": -0.005335395690053701, "vf_loss": 78.6177749633789, "vf_explained_var": 0.7651795744895935, "kl": 0.0017719753086566925, "entropy": 1.164175033569336, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1536000, "episodes_total": 3840, "training_iteration": 120, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-44-44", "timestamp": 1660250684, "time_this_iter_s": 30.042346954345703, "time_total_s": 9102.451502799988, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9102.451502799988, "timesteps_since_restore": 1536000, "iterations_since_restore": 120, "perf": {"cpu_util_percent": 33.52142857142857, "ram_util_percent": 58.276190476190465}} +{"episode_reward_max": 582.0, "episode_reward_min": 365.0, "episode_reward_mean": 523.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 261.805}, "custom_metrics": {"sparse_reward_mean": 181.6, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 160.41, "shaped_reward_min": 118, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.47, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 3.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.28, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0, 519.0, 527.0, 504.0, 515.0, 570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0, 510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6974210688663596, "mean_processing_ms": 0.40692675131976414, "mean_inference_ms": 2.2458724940047134}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2904000, "num_steps_sampled": 1548800, "sample_time_ms": 20389.537, "load_time_ms": 37.524, "grad_time_ms": 9428.918, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.5046327914733034e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008685672655701637, "policy_loss": -0.007173554971814156, "vf_loss": 86.26753997802734, "vf_explained_var": 0.7487472891807556, "kl": 0.001581608667038381, "entropy": 1.1692520380020142, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1548800, "episodes_total": 3872, "training_iteration": 121, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-45-15", "timestamp": 1660250715, "time_this_iter_s": 30.52119469642639, "time_total_s": 9132.972697496414, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9132.972697496414, "timesteps_since_restore": 1548800, "iterations_since_restore": 121, "perf": {"cpu_util_percent": 33.260465116279065, "ram_util_percent": 58.28837209302325}} +{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 521.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.89}, "custom_metrics": {"sparse_reward_mean": 180.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 160.18, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.12, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.46, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 3.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.17, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.12, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.46, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.12, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.46, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0, 519.0, 464.0, 533.0, 525.0, 510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0, 564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6870995124483497, "mean_processing_ms": 0.4048687165306798, "mean_inference_ms": 2.2359032456104564}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2928000, "num_steps_sampled": 1561600, "sample_time_ms": 20397.642, "load_time_ms": 37.125, "grad_time_ms": 9297.193, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.523163957366517e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016971243312582374, "policy_loss": -0.006050370167940855, "vf_loss": 83.2496109008789, "vf_explained_var": 0.7647652626037598, "kl": 0.0023221501614898443, "entropy": 1.154932975769043, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1561600, "episodes_total": 3904, "training_iteration": 122, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-45-44", "timestamp": 1660250744, "time_this_iter_s": 29.548327922821045, "time_total_s": 9162.521025419235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9162.521025419235, "timesteps_since_restore": 1561600, "iterations_since_restore": 122, "perf": {"cpu_util_percent": 34.20476190476191, "ram_util_percent": 58.335714285714296}} +{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 527.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 263.885}, "custom_metrics": {"sparse_reward_mean": 182.6, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 162.57, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.24, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.15, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.63, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.07, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.91, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.24, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.24, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0, 522.0, 525.0, 501.0, 522.0, 564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0, 576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0]}, "sampler_perf": {"mean_env_wait_ms": 1.676944844024091, "mean_processing_ms": 0.4028482080008365, "mean_inference_ms": 2.2261191245028336}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2952000, "num_steps_sampled": 1574400, "sample_time_ms": 20489.857, "load_time_ms": 37.388, "grad_time_ms": 9147.038, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.7615819786832586e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.004776147659868002, "policy_loss": -0.0032110288739204407, "vf_loss": 85.63726806640625, "vf_explained_var": 0.7386021614074707, "kl": 0.0019908936228603125, "entropy": 1.1530929803848267, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1574400, "episodes_total": 3936, "training_iteration": 123, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-46-15", "timestamp": 1660250775, "time_this_iter_s": 30.61848020553589, "time_total_s": 9193.139505624771, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9193.139505624771, "timesteps_since_restore": 1574400, "iterations_since_restore": 123, "perf": {"cpu_util_percent": 33.09302325581395, "ram_util_percent": 58.90232558139535}} +{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 532.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.085}, "custom_metrics": {"sparse_reward_mean": 184.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 163.77, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.81, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.59, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.07, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.42, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0, 573.0, 450.0, 479.0, 522.0, 576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0, 536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.666936267539323, "mean_processing_ms": 0.40085873681350664, "mean_inference_ms": 2.216239678267129}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2976000, "num_steps_sampled": 1587200, "sample_time_ms": 20498.249, "load_time_ms": 37.366, "grad_time_ms": 8983.735, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8807909893416293e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.001341886818408966, "policy_loss": -0.006108943372964859, "vf_loss": 80.26326751708984, "vf_explained_var": 0.763457715511322, "kl": 0.0015635616146028042, "entropy": 1.1509909629821777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1587200, "episodes_total": 3968, "training_iteration": 124, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-46-43", "timestamp": 1660250803, "time_this_iter_s": 28.20863699913025, "time_total_s": 9221.348142623901, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9221.348142623901, "timesteps_since_restore": 1587200, "iterations_since_restore": 124, "perf": {"cpu_util_percent": 33.417500000000004, "ram_util_percent": 58.46}} +{"episode_reward_max": 582.0, "episode_reward_min": 450.0, "episode_reward_mean": 544.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 212.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.12}, "custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.04, "shaped_reward_min": 130, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.58, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0, 525.0, 536.0, 573.0, 573.0, 536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0, 525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6570861935641006, "mean_processing_ms": 0.3989029759372638, "mean_inference_ms": 2.206484585731059}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3000000, "num_steps_sampled": 1600000, "sample_time_ms": 20649.468, "load_time_ms": 37.298, "grad_time_ms": 8980.547, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001723404973745346, "policy_loss": -0.005563261453062296, "vf_loss": 78.65084075927734, "vf_explained_var": 0.7562505602836609, "kl": 0.00201344583183527, "entropy": 1.1568351984024048, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1600000, "episodes_total": 4000, "training_iteration": 125, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-47-13", "timestamp": 1660250833, "time_this_iter_s": 29.59022808074951, "time_total_s": 9250.93837070465, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9250.93837070465, "timesteps_since_restore": 1600000, "iterations_since_restore": 125, "perf": {"cpu_util_percent": 29.699999999999996, "ram_util_percent": 58.414285714285725}} +{"episode_reward_max": 627.0, "episode_reward_min": 288.0, "episode_reward_mean": 541.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 270.505}, "custom_metrics": {"sparse_reward_mean": 187.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 165.81, "shaped_reward_min": 88, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.05, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.46, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0, 530.0, 536.0, 527.0, 573.0, 525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0, 579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6473988294860027, "mean_processing_ms": 0.3969770866429698, "mean_inference_ms": 2.1969066690858874}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3024000, "num_steps_sampled": 1612800, "sample_time_ms": 20818.492, "load_time_ms": 37.263, "grad_time_ms": 8921.308, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004004280548542738, "policy_loss": -0.004071192815899849, "vf_loss": 86.5199966430664, "vf_explained_var": 0.7602561116218567, "kl": 0.0020587241742759943, "entropy": 1.153051495552063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1612800, "episodes_total": 4032, "training_iteration": 126, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-47-43", "timestamp": 1660250863, "time_this_iter_s": 30.492609977722168, "time_total_s": 9281.430980682373, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9281.430980682373, "timesteps_since_restore": 1612800, "iterations_since_restore": 126, "perf": {"cpu_util_percent": 30.204651162790697, "ram_util_percent": 58.4372093023256}} +{"episode_reward_max": 630.0, "episode_reward_min": 288.0, "episode_reward_mean": 546.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 273.07}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 166.14, "shaped_reward_min": 88, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.74, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.54, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.54, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.54, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0, 522.0, 513.0, 513.0, 573.0, 579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0, 470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0]}, "sampler_perf": {"mean_env_wait_ms": 1.637864403277332, "mean_processing_ms": 0.39508190605522825, "mean_inference_ms": 2.1874563334987878}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3048000, "num_steps_sampled": 1625600, "sample_time_ms": 20804.416, "load_time_ms": 37.185, "grad_time_ms": 8880.278, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00042137285345233977, "policy_loss": -0.007074173539876938, "vf_loss": 72.28662872314453, "vf_explained_var": 0.7638903260231018, "kl": 0.0020576624665409327, "entropy": 1.1517143249511719, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1625600, "episodes_total": 4064, "training_iteration": 127, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-48-13", "timestamp": 1660250893, "time_this_iter_s": 29.709146738052368, "time_total_s": 9311.140127420425, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9311.140127420425, "timesteps_since_restore": 1625600, "iterations_since_restore": 127, "perf": {"cpu_util_percent": 33.61904761904762, "ram_util_percent": 58.37380952380953}} +{"episode_reward_max": 630.0, "episode_reward_min": 288.0, "episode_reward_mean": 546.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 273.2}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 166.4, "shaped_reward_min": 88, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.44, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.44, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.44, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0, 570.0, 573.0, 507.0, 573.0, 470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0, 576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6284902770333296, "mean_processing_ms": 0.39321693792454526, "mean_inference_ms": 2.178397267354796}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3072000, "num_steps_sampled": 1638400, "sample_time_ms": 20978.899, "load_time_ms": 37.247, "grad_time_ms": 8964.602, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014722892083227634, "policy_loss": -0.0057091922499239445, "vf_loss": 77.60167694091797, "vf_explained_var": 0.7587153315544128, "kl": 0.0015954332193359733, "entropy": 1.1573811769485474, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1638400, "episodes_total": 4096, "training_iteration": 128, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-48-44", "timestamp": 1660250924, "time_this_iter_s": 31.189378023147583, "time_total_s": 9342.329505443573, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9342.329505443573, "timesteps_since_restore": 1638400, "iterations_since_restore": 128, "perf": {"cpu_util_percent": 28.313636363636366, "ram_util_percent": 58.377272727272725}} +{"episode_reward_max": 630.0, "episode_reward_min": 396.0, "episode_reward_mean": 548.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 274.355}, "custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 166.71, "shaped_reward_min": 116, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.11, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.05, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.2, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.05, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.05, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0, 527.0, 579.0, 570.0, 522.0, 576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0, 573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6192641289400398, "mean_processing_ms": 0.39137958658922545, "mean_inference_ms": 2.169420054882037}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3096000, "num_steps_sampled": 1651200, "sample_time_ms": 21000.262, "load_time_ms": 37.071, "grad_time_ms": 9066.202, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002854668302461505, "policy_loss": -0.004409888293594122, "vf_loss": 78.45098114013672, "vf_explained_var": 0.7681138515472412, "kl": 0.0020372606813907623, "entropy": 1.1610809564590454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1651200, "episodes_total": 4128, "training_iteration": 129, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-49-15", "timestamp": 1660250955, "time_this_iter_s": 31.373005151748657, "time_total_s": 9373.702510595322, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9373.702510595322, "timesteps_since_restore": 1651200, "iterations_since_restore": 129, "perf": {"cpu_util_percent": 30.084444444444443, "ram_util_percent": 58.27111111111109}} +{"episode_reward_max": 630.0, "episode_reward_min": 396.0, "episode_reward_mean": 544.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 272.245}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 166.09, "shaped_reward_min": 116, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.37, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.11, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.37, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.37, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0, 576.0, 504.0, 630.0, 579.0, 573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0, 561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6101955338743283, "mean_processing_ms": 0.3895722397312522, "mean_inference_ms": 2.1607416335063014}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3120000, "num_steps_sampled": 1664000, "sample_time_ms": 21017.48, "load_time_ms": 36.902, "grad_time_ms": 9228.931, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00922582671046257, "policy_loss": 0.0016869133105501533, "vf_loss": 81.20984649658203, "vf_explained_var": 0.7594642043113708, "kl": 0.003354247659444809, "entropy": 1.164129376411438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1664000, "episodes_total": 4160, "training_iteration": 130, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-49-47", "timestamp": 1660250987, "time_this_iter_s": 31.841378211975098, "time_total_s": 9405.543888807297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9405.543888807297, "timesteps_since_restore": 1664000, "iterations_since_restore": 130, "perf": {"cpu_util_percent": 32.67111111111111, "ram_util_percent": 58.35999999999998}} +{"episode_reward_max": 582.0, "episode_reward_min": 396.0, "episode_reward_mean": 538.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 269.245}, "custom_metrics": {"sparse_reward_mean": 187.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 164.09, "shaped_reward_min": 116, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.98, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.81, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.59, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.61, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.59, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.61, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.59, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.61, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0, 533.0, 396.0, 573.0, 570.0, 561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0, 582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0]}, "sampler_perf": {"mean_env_wait_ms": 1.601265947983415, "mean_processing_ms": 0.38779407949911077, "mean_inference_ms": 2.1521646972676964}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3144000, "num_steps_sampled": 1676800, "sample_time_ms": 21002.089, "load_time_ms": 36.986, "grad_time_ms": 9370.108, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003206493565812707, "policy_loss": -0.00454886956140399, "vf_loss": 83.29342651367188, "vf_explained_var": 0.7723144888877869, "kl": 0.0017231384990736842, "entropy": 1.1479605436325073, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1676800, "episodes_total": 4192, "training_iteration": 131, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-50-19", "timestamp": 1660251019, "time_this_iter_s": 31.782477855682373, "time_total_s": 9437.32636666298, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9437.32636666298, "timesteps_since_restore": 1676800, "iterations_since_restore": 131, "perf": {"cpu_util_percent": 30.406818181818174, "ram_util_percent": 58.26818181818181}} +{"episode_reward_max": 587.0, "episode_reward_min": 396.0, "episode_reward_mean": 544.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.11}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 166.22, "shaped_reward_min": 116, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.73, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.85, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.73, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.85, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.73, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.85, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0, 570.0, 487.0, 530.0, 582.0, 582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0, 573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5924734608627003, "mean_processing_ms": 0.38604375166496974, "mean_inference_ms": 2.1438413106785164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3168000, "num_steps_sampled": 1689600, "sample_time_ms": 21092.562, "load_time_ms": 37.447, "grad_time_ms": 9515.71, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031676997896283865, "policy_loss": -0.004229032900184393, "vf_loss": 79.702880859375, "vf_explained_var": 0.7654879093170166, "kl": 0.0019305540481582284, "entropy": 1.1470965147018433, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1689600, "episodes_total": 4224, "training_iteration": 132, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-50-51", "timestamp": 1660251051, "time_this_iter_s": 31.913390159606934, "time_total_s": 9469.239756822586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9469.239756822586, "timesteps_since_restore": 1689600, "iterations_since_restore": 132, "perf": {"cpu_util_percent": 33.193478260869554, "ram_util_percent": 58.276086956521716}} +{"episode_reward_max": 587.0, "episode_reward_min": 123.0, "episode_reward_mean": 540.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 270.16}, "custom_metrics": {"sparse_reward_mean": 187.6, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 165.12, "shaped_reward_min": 43, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.01, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.58, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.58, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.58, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0, 570.0, 579.0, 453.0, 515.0, 573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0, 570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.583806400101539, "mean_processing_ms": 0.3843212568870559, "mean_inference_ms": 2.1355512648653474}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3192000, "num_steps_sampled": 1702400, "sample_time_ms": 21045.575, "load_time_ms": 37.092, "grad_time_ms": 9547.761, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004417246673256159, "policy_loss": -0.0036684710066765547, "vf_loss": 86.54926300048828, "vf_explained_var": 0.7708062529563904, "kl": 0.0019647751469165087, "entropy": 1.1384211778640747, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1702400, "episodes_total": 4256, "training_iteration": 133, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-51-22", "timestamp": 1660251082, "time_this_iter_s": 30.465492963790894, "time_total_s": 9499.705249786377, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9499.705249786377, "timesteps_since_restore": 1702400, "iterations_since_restore": 133, "perf": {"cpu_util_percent": 27.94883720930233, "ram_util_percent": 58.35813953488371}} +{"episode_reward_max": 587.0, "episode_reward_min": 123.0, "episode_reward_mean": 547.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.85}, "custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 168.1, "shaped_reward_min": 43, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.86, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0, 527.0, 522.0, 418.0, 579.0, 570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0, 396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5752673474082683, "mean_processing_ms": 0.382627028657334, "mean_inference_ms": 2.127290316097115}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3216000, "num_steps_sampled": 1715200, "sample_time_ms": 21118.905, "load_time_ms": 37.128, "grad_time_ms": 9783.423, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003744603367522359, "policy_loss": -0.004061851184815168, "vf_loss": 83.74505615234375, "vf_explained_var": 0.7541170120239258, "kl": 0.001809759414754808, "entropy": 1.1361082792282104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1715200, "episodes_total": 4288, "training_iteration": 134, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-51-53", "timestamp": 1660251113, "time_this_iter_s": 31.301603078842163, "time_total_s": 9531.00685286522, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9531.00685286522, "timesteps_since_restore": 1715200, "iterations_since_restore": 134, "perf": {"cpu_util_percent": 35.08181818181818, "ram_util_percent": 58.252272727272725}} +{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 546.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.49}, "custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 168.18, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.28, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.45, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0, 524.0, 579.0, 509.0, 527.0, 396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0, 519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5668312042090953, "mean_processing_ms": 0.38095382690757534, "mean_inference_ms": 2.118642826517617}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3240000, "num_steps_sampled": 1728000, "sample_time_ms": 20924.032, "load_time_ms": 37.391, "grad_time_ms": 9937.539, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035265153273940086, "policy_loss": -0.004777689930051565, "vf_loss": 88.75411224365234, "vf_explained_var": 0.7641527056694031, "kl": 0.002029512310400605, "entropy": 1.1424118280410767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1728000, "episodes_total": 4320, "training_iteration": 135, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-52-22", "timestamp": 1660251142, "time_this_iter_s": 29.18880271911621, "time_total_s": 9560.195655584335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9560.195655584335, "timesteps_since_restore": 1728000, "iterations_since_restore": 135, "perf": {"cpu_util_percent": 35.34146341463415, "ram_util_percent": 58.2390243902439}} +{"episode_reward_max": 582.0, "episode_reward_min": 228.0, "episode_reward_mean": 550.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 275.345}, "custom_metrics": {"sparse_reward_mean": 190.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 169.49, "shaped_reward_min": 68, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.86, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.31, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.55, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.22, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.22, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.22, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0, 576.0, 522.0, 579.0, 576.0, 519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0, 579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5585142836902617, "mean_processing_ms": 0.37930323172658476, "mean_inference_ms": 2.109956620242284}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3264000, "num_steps_sampled": 1740800, "sample_time_ms": 20787.416, "load_time_ms": 37.15, "grad_time_ms": 10003.018, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003316950984299183, "policy_loss": -0.004835940897464752, "vf_loss": 87.2677993774414, "vf_explained_var": 0.7657222151756287, "kl": 0.0019325317116454244, "entropy": 1.1477751731872559, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1740800, "episodes_total": 4352, "training_iteration": 136, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-52-52", "timestamp": 1660251172, "time_this_iter_s": 29.77871298789978, "time_total_s": 9589.974368572235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9589.974368572235, "timesteps_since_restore": 1740800, "iterations_since_restore": 136, "perf": {"cpu_util_percent": 32.416666666666664, "ram_util_percent": 58.35476190476191}} +{"episode_reward_max": 630.0, "episode_reward_min": 228.0, "episode_reward_mean": 546.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 273.105}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 167.81, "shaped_reward_min": 68, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.06, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.48, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.89, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0, 573.0, 579.0, 582.0, 579.0, 579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0, 570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5503391624395824, "mean_processing_ms": 0.37768347850915746, "mean_inference_ms": 2.101391542622976}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3288000, "num_steps_sampled": 1753600, "sample_time_ms": 20872.885, "load_time_ms": 37.097, "grad_time_ms": 9999.872, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012538364389911294, "policy_loss": -0.005918627139180899, "vf_loss": 77.47673797607422, "vf_explained_var": 0.7781977653503418, "kl": 0.0019029680406674743, "entropy": 1.1504276990890503, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1753600, "episodes_total": 4384, "training_iteration": 137, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-53-22", "timestamp": 1660251202, "time_this_iter_s": 30.53275179862976, "time_total_s": 9620.507120370865, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9620.507120370865, "timesteps_since_restore": 1753600, "iterations_since_restore": 137, "perf": {"cpu_util_percent": 31.753488372093024, "ram_util_percent": 58.406976744186025}} +{"episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 546.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 142.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 273.055}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 168.11, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.22, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.22, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0, 516.0, 576.0, 582.0, 522.0, 570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0, 525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5422943636827329, "mean_processing_ms": 0.376087334740523, "mean_inference_ms": 2.0930739405664296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3312000, "num_steps_sampled": 1766400, "sample_time_ms": 20654.49, "load_time_ms": 37.188, "grad_time_ms": 9982.936, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023388864938169718, "policy_loss": -0.0055216290056705475, "vf_loss": 84.28978729248047, "vf_explained_var": 0.7621362209320068, "kl": 0.0017433507600799203, "entropy": 1.136921763420105, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1766400, "episodes_total": 4416, "training_iteration": 138, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-53-51", "timestamp": 1660251231, "time_this_iter_s": 28.834796905517578, "time_total_s": 9649.341917276382, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9649.341917276382, "timesteps_since_restore": 1766400, "iterations_since_restore": 138, "perf": {"cpu_util_percent": 31.565853658536582, "ram_util_percent": 58.34634146341463}} +{"episode_reward_max": 630.0, "episode_reward_min": 342.0, "episode_reward_mean": 553.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 276.825}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.05, "shaped_reward_min": 102, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.1, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.18, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0, 573.0, 576.0, 579.0, 570.0, 525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0, 630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.534348979817542, "mean_processing_ms": 0.37451084317148714, "mean_inference_ms": 2.0846763834338202}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3336000, "num_steps_sampled": 1779200, "sample_time_ms": 20417.117, "load_time_ms": 37.045, "grad_time_ms": 9852.38, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006708970759063959, "policy_loss": -0.0014362437650561333, "vf_loss": 87.18399810791016, "vf_explained_var": 0.7458827495574951, "kl": 0.0019282657885923982, "entropy": 1.1463767290115356, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1779200, "episodes_total": 4448, "training_iteration": 139, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-54-19", "timestamp": 1660251259, "time_this_iter_s": 27.688152074813843, "time_total_s": 9677.030069351196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9677.030069351196, "timesteps_since_restore": 1779200, "iterations_since_restore": 139, "perf": {"cpu_util_percent": 30.3025641025641, "ram_util_percent": 58.341025641025624}} +{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 556.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 278.16}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.72, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.18, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.1, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.1, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.1, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 573.0, 516.0, 630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0, 576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5264986068464634, "mean_processing_ms": 0.37294987476110114, "mean_inference_ms": 2.0764572301568647}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3360000, "num_steps_sampled": 1792000, "sample_time_ms": 20405.098, "load_time_ms": 37.005, "grad_time_ms": 9490.101, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018680343637242913, "policy_loss": -0.005901841446757317, "vf_loss": 83.4326400756836, "vf_explained_var": 0.7634987831115723, "kl": 0.002031019888818264, "entropy": 1.1467581987380981, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1792000, "episodes_total": 4480, "training_iteration": 140, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-54-47", "timestamp": 1660251287, "time_this_iter_s": 28.096507787704468, "time_total_s": 9705.1265771389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9705.1265771389, "timesteps_since_restore": 1792000, "iterations_since_restore": 140, "perf": {"cpu_util_percent": 30.9875, "ram_util_percent": 58.3925}} +{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 280.105}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 171.81, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.17, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.21, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.75, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.21, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.21, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0, 527.0, 579.0, 573.0, 579.0, 576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0, 530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0]}, "sampler_perf": {"mean_env_wait_ms": 1.518751324463327, "mean_processing_ms": 0.37140910407762817, "mean_inference_ms": 2.0683253134575508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3384000, "num_steps_sampled": 1804800, "sample_time_ms": 20217.154, "load_time_ms": 36.826, "grad_time_ms": 9453.413, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004852355923503637, "policy_loss": -0.003466278314590454, "vf_loss": 88.89630126953125, "vf_explained_var": 0.7491546273231506, "kl": 0.0020531185436993837, "entropy": 1.1419917345046997, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1804800, "episodes_total": 4512, "training_iteration": 141, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-55-17", "timestamp": 1660251317, "time_this_iter_s": 29.532893180847168, "time_total_s": 9734.659470319748, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9734.659470319748, "timesteps_since_restore": 1804800, "iterations_since_restore": 141, "perf": {"cpu_util_percent": 29.842857142857145, "ram_util_percent": 58.3642857142857}} +{"episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 554.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 277.34}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.68, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.82, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.15, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0, 587.0, 504.0, 630.0, 536.0, 530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5111310718572366, "mean_processing_ms": 0.36989117514475767, "mean_inference_ms": 2.0605337474583503}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3408000, "num_steps_sampled": 1817600, "sample_time_ms": 20052.563, "load_time_ms": 36.402, "grad_time_ms": 9487.641, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00461258739233017, "policy_loss": -0.0034613541793078184, "vf_loss": 86.5114974975586, "vf_explained_var": 0.770569384098053, "kl": 0.0022539596538990736, "entropy": 1.154403805732727, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1817600, "episodes_total": 4544, "training_iteration": 142, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-55-47", "timestamp": 1660251347, "time_this_iter_s": 30.608631134033203, "time_total_s": 9765.268101453781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9765.268101453781, "timesteps_since_restore": 1817600, "iterations_since_restore": 142, "perf": {"cpu_util_percent": 30.76046511627907, "ram_util_percent": 58.4186046511628}} +{"episode_reward_max": 633.0, "episode_reward_min": 123.0, "episode_reward_mean": 553.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 276.845}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.49, "shaped_reward_min": 43, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.23, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.87, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.68, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.01, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.6, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.01, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.01, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0, 522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5036560426122845, "mean_processing_ms": 0.36840756428267724, "mean_inference_ms": 2.053955603003225}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3432000, "num_steps_sampled": 1830400, "sample_time_ms": 20726.741, "load_time_ms": 36.338, "grad_time_ms": 9326.632, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004940376617014408, "policy_loss": -0.002967018634080887, "vf_loss": 84.7812271118164, "vf_explained_var": 0.7767437100410461, "kl": 0.0015952900284901261, "entropy": 1.1414709091186523, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1830400, "episodes_total": 4576, "training_iteration": 143, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-56-23", "timestamp": 1660251383, "time_this_iter_s": 35.59740996360779, "time_total_s": 9800.865511417389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9800.865511417389, "timesteps_since_restore": 1830400, "iterations_since_restore": 143, "perf": {"cpu_util_percent": 28.452, "ram_util_percent": 58.38199999999999}} +{"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 555.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 277.9}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 171.4, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.79, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0, 579.0, 582.0, 573.0, 576.0, 522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0, 579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4962976590401342, "mean_processing_ms": 0.3669482669309259, "mean_inference_ms": 2.047611703358859}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3456000, "num_steps_sampled": 1843200, "sample_time_ms": 20697.837, "load_time_ms": 36.511, "grad_time_ms": 9260.983, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007136467844247818, "policy_loss": -0.0009602725622244179, "vf_loss": 86.6334457397461, "vf_explained_var": 0.7632217407226562, "kl": 0.0016821371391415596, "entropy": 1.1331907510757446, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1843200, "episodes_total": 4608, "training_iteration": 144, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-56-53", "timestamp": 1660251413, "time_this_iter_s": 30.354671239852905, "time_total_s": 9831.220182657242, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9831.220182657242, "timesteps_since_restore": 1843200, "iterations_since_restore": 144, "perf": {"cpu_util_percent": 32.744186046511636, "ram_util_percent": 58.41860465116278}} +{"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 559.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 279.92}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 171.84, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.68, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.61, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.8, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.69, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.61, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.8, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.61, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.8, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0, 465.0, 579.0, 579.0, 573.0, 579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0, 579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 1.489071157531513, "mean_processing_ms": 0.36552392885765655, "mean_inference_ms": 2.041836182574639}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3480000, "num_steps_sampled": 1856000, "sample_time_ms": 21121.64, "load_time_ms": 36.034, "grad_time_ms": 9412.787, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004381807986646891, "policy_loss": -0.0034314494114369154, "vf_loss": 83.83314514160156, "vf_explained_var": 0.7805802226066589, "kl": 0.0022449749521911144, "entropy": 1.1401251554489136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1856000, "episodes_total": 4640, "training_iteration": 145, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-57-28", "timestamp": 1660251448, "time_this_iter_s": 34.93755006790161, "time_total_s": 9866.157732725143, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9866.157732725143, "timesteps_since_restore": 1856000, "iterations_since_restore": 145, "perf": {"cpu_util_percent": 34.077999999999996, "ram_util_percent": 58.46000000000001}} +{"episode_reward_max": 636.0, "episode_reward_min": 234.0, "episode_reward_mean": 565.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 112.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 282.895}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.19, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.75, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.75, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.75, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0, 633.0, 527.0, 579.0, 630.0, 579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0, 579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4819236352558276, "mean_processing_ms": 0.3641093342877964, "mean_inference_ms": 2.0353369545862554}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3504000, "num_steps_sampled": 1868800, "sample_time_ms": 21304.847, "load_time_ms": 36.48, "grad_time_ms": 9579.852, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003786050481721759, "policy_loss": -0.004469693172723055, "vf_loss": 88.19132232666016, "vf_explained_var": 0.7629249095916748, "kl": 0.0019031836418434978, "entropy": 1.1267800331115723, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1868800, "episodes_total": 4672, "training_iteration": 146, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-58-02", "timestamp": 1660251482, "time_this_iter_s": 33.29244089126587, "time_total_s": 9899.45017361641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9899.45017361641, "timesteps_since_restore": 1868800, "iterations_since_restore": 146, "perf": {"cpu_util_percent": 34.12765957446809, "ram_util_percent": 58.40212765957448}} +{"episode_reward_max": 630.0, "episode_reward_min": 234.0, "episode_reward_mean": 569.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 112.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.95}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.5, "shaped_reward_min": 74, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.88, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 570.0, 587.0, 579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4748545298111457, "mean_processing_ms": 0.362707314353448, "mean_inference_ms": 2.028542543914196}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3528000, "num_steps_sampled": 1881600, "sample_time_ms": 21110.787, "load_time_ms": 36.619, "grad_time_ms": 9537.52, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004449079744517803, "policy_loss": -0.0040388829074800014, "vf_loss": 90.50656127929688, "vf_explained_var": 0.7546594142913818, "kl": 0.0021286073606461287, "entropy": 1.1253728866577148, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1881600, "episodes_total": 4704, "training_iteration": 147, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-58-30", "timestamp": 1660251510, "time_this_iter_s": 28.167391061782837, "time_total_s": 9927.617564678192, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9927.617564678192, "timesteps_since_restore": 1881600, "iterations_since_restore": 147, "perf": {"cpu_util_percent": 35.05, "ram_util_percent": 58.895}} +{"episode_reward_max": 630.0, "episode_reward_min": 419.0, "episode_reward_mean": 573.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 206.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 286.76}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.12, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.36, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0, 627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4678650225470864, "mean_processing_ms": 0.36131377085229416, "mean_inference_ms": 2.021530568417585}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3552000, "num_steps_sampled": 1894400, "sample_time_ms": 21206.603, "load_time_ms": 36.564, "grad_time_ms": 9559.344, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005005656275898218, "policy_loss": -0.0032577281817793846, "vf_loss": 88.27960205078125, "vf_explained_var": 0.7724118232727051, "kl": 0.001774882897734642, "entropy": 1.1291688680648804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1894400, "episodes_total": 4736, "training_iteration": 148, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-59-00", "timestamp": 1660251540, "time_this_iter_s": 30.007760047912598, "time_total_s": 9957.625324726105, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9957.625324726105, "timesteps_since_restore": 1894400, "iterations_since_restore": 148, "perf": {"cpu_util_percent": 34.11190476190476, "ram_util_percent": 58.61190476190477}} +{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 568.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 284.445}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 175.29, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.47, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.64, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0, 582.0, 576.0, 587.0, 579.0, 627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0, 582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4609623899935302, "mean_processing_ms": 0.3599358567345953, "mean_inference_ms": 2.0144329368432397}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3576000, "num_steps_sampled": 1907200, "sample_time_ms": 21431.224, "load_time_ms": 36.685, "grad_time_ms": 9588.21, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004353505093604326, "policy_loss": -0.003862809156998992, "vf_loss": 87.85071563720703, "vf_explained_var": 0.7780687212944031, "kl": 0.002437218790873885, "entropy": 1.137519359588623, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1907200, "episodes_total": 4768, "training_iteration": 149, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-59-30", "timestamp": 1660251570, "time_this_iter_s": 30.225661993026733, "time_total_s": 9987.850986719131, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9987.850986719131, "timesteps_since_restore": 1907200, "iterations_since_restore": 149, "perf": {"cpu_util_percent": 31.69069767441861, "ram_util_percent": 58.481395348837225}} +{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 282.965}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.93, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.19, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.68, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.26, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.57, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.26, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.57, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.26, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.57, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0, 573.0, 579.0, 576.0, 576.0, 582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4541632476370934, "mean_processing_ms": 0.35857867420290873, "mean_inference_ms": 2.0077131328660243}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3600000, "num_steps_sampled": 1920000, "sample_time_ms": 21437.009, "load_time_ms": 37.013, "grad_time_ms": 9883.239, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004526351112872362, "policy_loss": -0.0033871959894895554, "vf_loss": 84.81644439697266, "vf_explained_var": 0.7658727169036865, "kl": 0.002766131656244397, "entropy": 1.1361898183822632, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1920000, "episodes_total": 4800, "training_iteration": 150, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-00-01", "timestamp": 1660251601, "time_this_iter_s": 31.108325004577637, "time_total_s": 10018.95931172371, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10018.95931172371, "timesteps_since_restore": 1920000, "iterations_since_restore": 150, "perf": {"cpu_util_percent": 34.15227272727273, "ram_util_percent": 58.540909090909096}} +{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 282.735}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.87, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.46, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.28, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.28, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.28, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0, 581.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0, 567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4474584223491644, "mean_processing_ms": 0.3572431217885297, "mean_inference_ms": 2.0013634824095012}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3624000, "num_steps_sampled": 1932800, "sample_time_ms": 21691.813, "load_time_ms": 37.111, "grad_time_ms": 9921.937, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037641674280166626, "policy_loss": -0.004076274111866951, "vf_loss": 84.02509307861328, "vf_explained_var": 0.7596387267112732, "kl": 0.001788324792869389, "entropy": 1.124145746231079, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1932800, "episodes_total": 4832, "training_iteration": 151, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-00-34", "timestamp": 1660251634, "time_this_iter_s": 32.471389293670654, "time_total_s": 10051.43070101738, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10051.43070101738, "timesteps_since_restore": 1932800, "iterations_since_restore": 151, "perf": {"cpu_util_percent": 34.02391304347826, "ram_util_percent": 58.56739130434782}} +{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 568.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.035}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.27, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.39, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.51, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.4, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.51, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.4, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.51, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.4, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0, 579.0, 560.0, 524.0, 576.0, 567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0, 582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4408300434942674, "mean_processing_ms": 0.35592594931553234, "mean_inference_ms": 1.9949418939108405}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3648000, "num_steps_sampled": 1945600, "sample_time_ms": 21661.173, "load_time_ms": 37.278, "grad_time_ms": 9874.5, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006686341017484665, "policy_loss": -0.0018611648119986057, "vf_loss": 91.119873046875, "vf_explained_var": 0.7503556609153748, "kl": 0.002358483849093318, "entropy": 1.128965973854065, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1945600, "episodes_total": 4864, "training_iteration": 152, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-03", "timestamp": 1660251663, "time_this_iter_s": 29.826536893844604, "time_total_s": 10081.257237911224, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10081.257237911224, "timesteps_since_restore": 1945600, "iterations_since_restore": 152, "perf": {"cpu_util_percent": 34.71904761904763, "ram_util_percent": 58.37619047619047}} +{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 569.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.96}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.32, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0, 516.0, 579.0, 582.0, 576.0, 582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4342721716334546, "mean_processing_ms": 0.3546223616494384, "mean_inference_ms": 1.9881963342076971}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3672000, "num_steps_sampled": 1958400, "sample_time_ms": 20770.751, "load_time_ms": 37.491, "grad_time_ms": 9746.757, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023278051521629095, "policy_loss": -0.0060347276739776134, "vf_loss": 89.3071060180664, "vf_explained_var": 0.7670709490776062, "kl": 0.0017067408189177513, "entropy": 1.1363595724105835, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1958400, "episodes_total": 4896, "training_iteration": 153, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-29", "timestamp": 1660251689, "time_this_iter_s": 25.417139053344727, "time_total_s": 10106.67437696457, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10106.67437696457, "timesteps_since_restore": 1958400, "iterations_since_restore": 153, "perf": {"cpu_util_percent": 33.84166666666667, "ram_util_percent": 58.383333333333326}} +{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 571.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.78}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.76, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.56, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.58, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.6, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.58, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.6, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.58, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.6, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0, 570.0, 579.0, 573.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0, 579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4277901540826492, "mean_processing_ms": 0.3533349618976105, "mean_inference_ms": 1.9812328755781439}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3696000, "num_steps_sampled": 1971200, "sample_time_ms": 20717.538, "load_time_ms": 37.296, "grad_time_ms": 9558.366, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0045716362074017525, "policy_loss": -0.004014193546026945, "vf_loss": 91.47116088867188, "vf_explained_var": 0.753397524356842, "kl": 0.001791521324776113, "entropy": 1.1225804090499878, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1971200, "episodes_total": 4928, "training_iteration": 154, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-57", "timestamp": 1660251717, "time_this_iter_s": 27.938206911087036, "time_total_s": 10134.612583875656, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10134.612583875656, "timesteps_since_restore": 1971200, "iterations_since_restore": 154, "perf": {"cpu_util_percent": 33.69230769230769, "ram_util_percent": 58.38717948717951}} +{"episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 573.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.615}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.83, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.53, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.7, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.53, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.7, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.53, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.7, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0, 579.0, 587.0, 582.0, 579.0, 579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0, 570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4213938109933664, "mean_processing_ms": 0.35206327984916896, "mean_inference_ms": 1.974322466189253}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3720000, "num_steps_sampled": 1984000, "sample_time_ms": 20399.728, "load_time_ms": 37.504, "grad_time_ms": 9171.651, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0077364686876535416, "policy_loss": -0.000512867234647274, "vf_loss": 88.10808563232422, "vf_explained_var": 0.7624195218086243, "kl": 0.0021189304534345865, "entropy": 1.1229437589645386, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1984000, "episodes_total": 4960, "training_iteration": 155, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-02-25", "timestamp": 1660251745, "time_this_iter_s": 27.89369297027588, "time_total_s": 10162.506276845932, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10162.506276845932, "timesteps_since_restore": 1984000, "iterations_since_restore": 155, "perf": {"cpu_util_percent": 34.404999999999994, "ram_util_percent": 58.395}} +{"episode_reward_max": 627.0, "episode_reward_min": 462.0, "episode_reward_mean": 570.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.3}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.4, "shaped_reward_min": 142, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.04, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.95, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0, 582.0, 582.0, 582.0, 579.0, 570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0, 573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4150911429857618, "mean_processing_ms": 0.3508138897349896, "mean_inference_ms": 1.9677135371948458}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3744000, "num_steps_sampled": 1996800, "sample_time_ms": 20238.789, "load_time_ms": 37.088, "grad_time_ms": 8942.988, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004439468961209059, "policy_loss": -0.0041490718722343445, "vf_loss": 91.5320053100586, "vf_explained_var": 0.7567749619483948, "kl": 0.001588103943504393, "entropy": 1.1293169260025024, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1996800, "episodes_total": 4992, "training_iteration": 156, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-02-54", "timestamp": 1660251774, "time_this_iter_s": 29.386072158813477, "time_total_s": 10191.892349004745, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10191.892349004745, "timesteps_since_restore": 1996800, "iterations_since_restore": 156, "perf": {"cpu_util_percent": 34.03658536585366, "ram_util_percent": 58.353658536585364}} +{"episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 569.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.82}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.84, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.8, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.79, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.2, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.86, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.2, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.86, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.2, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.86, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0, 579.0, 579.0, 581.0, 579.0, 573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0, 587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4088928730851003, "mean_processing_ms": 0.34958857715576797, "mean_inference_ms": 1.9613751884714845}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3768000, "num_steps_sampled": 2009600, "sample_time_ms": 20457.246, "load_time_ms": 36.907, "grad_time_ms": 8867.46, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003602199489250779, "policy_loss": -0.004857169929891825, "vf_loss": 90.2380599975586, "vf_explained_var": 0.7651500105857849, "kl": 0.0019707006867974997, "entropy": 1.1288973093032837, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2009600, "episodes_total": 5024, "training_iteration": 157, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-03-24", "timestamp": 1660251804, "time_this_iter_s": 29.596789121627808, "time_total_s": 10221.489138126373, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10221.489138126373, "timesteps_since_restore": 2009600, "iterations_since_restore": 157, "perf": {"cpu_util_percent": 35.76428571428571, "ram_util_percent": 58.59285714285714}} +{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 563.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.585}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.77, "shaped_reward_min": 65, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.97, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.65, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.65, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.65, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0, 525.0, 582.0, 582.0, 587.0, 587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0, 582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4027851249897816, "mean_processing_ms": 0.3483803806446401, "mean_inference_ms": 1.9551724322503146}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3792000, "num_steps_sampled": 2022400, "sample_time_ms": 20413.446, "load_time_ms": 37.245, "grad_time_ms": 8954.782, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005850760731846094, "policy_loss": -0.002336603356525302, "vf_loss": 87.48675537109375, "vf_explained_var": 0.7656591534614563, "kl": 0.0021419422701001167, "entropy": 1.1226191520690918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2022400, "episodes_total": 5056, "training_iteration": 158, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-03-54", "timestamp": 1660251834, "time_this_iter_s": 30.44686508178711, "time_total_s": 10251.93600320816, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10251.93600320816, "timesteps_since_restore": 2022400, "iterations_since_restore": 158, "perf": {"cpu_util_percent": 37.25348837209302, "ram_util_percent": 58.44883720930233}} +{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 564.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 282.115}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.03, "shaped_reward_min": 65, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.6, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0, 515.0, 576.0, 582.0, 579.0, 582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0, 576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3967794054069242, "mean_processing_ms": 0.3471925853842631, "mean_inference_ms": 1.9496805791927851}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3816000, "num_steps_sampled": 2035200, "sample_time_ms": 20765.512, "load_time_ms": 37.179, "grad_time_ms": 8845.352, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005116061773151159, "policy_loss": -0.0030946088954806328, "vf_loss": 87.75751495361328, "vf_explained_var": 0.7570715546607971, "kl": 0.0022622861433774233, "entropy": 1.1301772594451904, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2035200, "episodes_total": 5088, "training_iteration": 159, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-04-27", "timestamp": 1660251867, "time_this_iter_s": 32.650943994522095, "time_total_s": 10284.586947202682, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10284.586947202682, "timesteps_since_restore": 2035200, "iterations_since_restore": 159, "perf": {"cpu_util_percent": 33.56739130434783, "ram_util_percent": 58.49565217391306}} +{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 562.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 281.46}, "custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.92, "shaped_reward_min": 65, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.07, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.29, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.27, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.07, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.07, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0, 573.0, 549.0, 501.0, 582.0, 576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0, 587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3908344800853478, "mean_processing_ms": 0.3460119024024818, "mean_inference_ms": 1.9441766821864475}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3840000, "num_steps_sampled": 2048000, "sample_time_ms": 20695.053, "load_time_ms": 36.846, "grad_time_ms": 8667.722, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033892595674842596, "policy_loss": -0.004953155294060707, "vf_loss": 89.03093719482422, "vf_explained_var": 0.7680574059486389, "kl": 0.0018749010050669312, "entropy": 1.1213653087615967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2048000, "episodes_total": 5120, "training_iteration": 160, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-04-56", "timestamp": 1660251896, "time_this_iter_s": 28.625488996505737, "time_total_s": 10313.212436199188, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10313.212436199188, "timesteps_since_restore": 2048000, "iterations_since_restore": 160, "perf": {"cpu_util_percent": 34.958536585365856, "ram_util_percent": 58.548780487804876}} +{"episode_reward_max": 630.0, "episode_reward_min": 237.0, "episode_reward_mean": 567.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 283.645}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.49, "shaped_reward_min": 77, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.94, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.55, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0, 630.0, 579.0, 495.0, 579.0, 587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0, 237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3849427098912996, "mean_processing_ms": 0.3448405344667729, "mean_inference_ms": 1.9385368397952782}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3864000, "num_steps_sampled": 2060800, "sample_time_ms": 20359.83, "load_time_ms": 36.714, "grad_time_ms": 8574.991, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003171335905790329, "policy_loss": -0.005784957204014063, "vf_loss": 95.20501708984375, "vf_explained_var": 0.7632928490638733, "kl": 0.001863123499788344, "entropy": 1.1284128427505493, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2060800, "episodes_total": 5152, "training_iteration": 161, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-05-24", "timestamp": 1660251924, "time_this_iter_s": 28.188406705856323, "time_total_s": 10341.400842905045, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10341.400842905045, "timesteps_since_restore": 2060800, "iterations_since_restore": 161, "perf": {"cpu_util_percent": 36.3875, "ram_util_percent": 58.585}} +{"episode_reward_max": 630.0, "episode_reward_min": 237.0, "episode_reward_mean": 568.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 284.18}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.16, "shaped_reward_min": 77, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0, 579.0, 582.0, 570.0, 630.0, 237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0, 567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3791010078296764, "mean_processing_ms": 0.34367607505559905, "mean_inference_ms": 1.932410583140313}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3888000, "num_steps_sampled": 2073600, "sample_time_ms": 20389.495, "load_time_ms": 36.868, "grad_time_ms": 8516.869, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007552561815828085, "policy_loss": -0.0015357719967141747, "vf_loss": 96.43359375, "vf_explained_var": 0.7504541277885437, "kl": 0.0026693844702094793, "entropy": 1.110058307647705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2073600, "episodes_total": 5184, "training_iteration": 162, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-05-53", "timestamp": 1660251953, "time_this_iter_s": 29.546289205551147, "time_total_s": 10370.947132110596, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10370.947132110596, "timesteps_since_restore": 2073600, "iterations_since_restore": 162, "perf": {"cpu_util_percent": 34.892682926829266, "ram_util_percent": 58.55365853658536}} +{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 568.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 284.395}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.79, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.01, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.01, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.01, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 582.0, 576.0, 582.0, 567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3733429258954066, "mean_processing_ms": 0.3425317863430243, "mean_inference_ms": 1.9266299653449164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3912000, "num_steps_sampled": 2086400, "sample_time_ms": 20824.632, "load_time_ms": 36.596, "grad_time_ms": 8758.37, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004362883511930704, "policy_loss": -0.003907353617250919, "vf_loss": 88.24420166015625, "vf_explained_var": 0.7741295695304871, "kl": 0.002105970401316881, "entropy": 1.1083542108535767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2086400, "episodes_total": 5216, "training_iteration": 163, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-06-26", "timestamp": 1660251986, "time_this_iter_s": 32.18382000923157, "time_total_s": 10403.130952119827, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10403.130952119827, "timesteps_since_restore": 2086400, "iterations_since_restore": 163, "perf": {"cpu_util_percent": 33.87608695652174, "ram_util_percent": 58.582608695652176}} +{"episode_reward_max": 630.0, "episode_reward_min": 336.0, "episode_reward_mean": 565.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 282.99}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 173.98, "shaped_reward_min": 96, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.0, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.0, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.0, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0, 561.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0, 582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3676747484724832, "mean_processing_ms": 0.341408599904806, "mean_inference_ms": 1.9212242933819834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3936000, "num_steps_sampled": 2099200, "sample_time_ms": 20928.906, "load_time_ms": 37.45, "grad_time_ms": 9167.09, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035528007429093122, "policy_loss": -0.005154869984835386, "vf_loss": 92.63870239257812, "vf_explained_var": 0.7672746181488037, "kl": 0.0020837958436459303, "entropy": 1.1124038696289062, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2099200, "episodes_total": 5248, "training_iteration": 164, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-06-59", "timestamp": 1660252019, "time_this_iter_s": 33.07875204086304, "time_total_s": 10436.20970416069, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10436.20970416069, "timesteps_since_restore": 2099200, "iterations_since_restore": 164, "perf": {"cpu_util_percent": 35.41276595744681, "ram_util_percent": 58.536170212765946}} +{"episode_reward_max": 633.0, "episode_reward_min": 336.0, "episode_reward_mean": 565.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 282.81}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.02, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.36, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0, 507.0, 493.0, 570.0, 579.0, 582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3620936512767547, "mean_processing_ms": 0.34030575499793914, "mean_inference_ms": 1.9161448666876886}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3960000, "num_steps_sampled": 2112000, "sample_time_ms": 21120.168, "load_time_ms": 37.608, "grad_time_ms": 9339.122, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019619378726929426, "policy_loss": -0.006335819140076637, "vf_loss": 88.54428100585938, "vf_explained_var": 0.7676218152046204, "kl": 0.0017338074976578355, "entropy": 1.1133431196212769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2112000, "episodes_total": 5280, "training_iteration": 165, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-07-30", "timestamp": 1660252050, "time_this_iter_s": 31.535957098007202, "time_total_s": 10467.745661258698, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10467.745661258698, "timesteps_since_restore": 2112000, "iterations_since_restore": 165, "perf": {"cpu_util_percent": 34.425000000000004, "ram_util_percent": 58.65909090909092}} +{"episode_reward_max": 633.0, "episode_reward_min": 336.0, "episode_reward_mean": 569.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.81}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.22, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.34, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0, 576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3565665278792982, "mean_processing_ms": 0.33921520895760066, "mean_inference_ms": 1.9109392629722073}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3984000, "num_steps_sampled": 2124800, "sample_time_ms": 21230.051, "load_time_ms": 38.109, "grad_time_ms": 9623.189, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006461843382567167, "policy_loss": -0.0018003573641180992, "vf_loss": 88.1545181274414, "vf_explained_var": 0.7546200752258301, "kl": 0.00197615590877831, "entropy": 1.106500267982483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2124800, "episodes_total": 5312, "training_iteration": 166, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-08-04", "timestamp": 1660252084, "time_this_iter_s": 33.33124303817749, "time_total_s": 10501.076904296875, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10501.076904296875, "timesteps_since_restore": 2124800, "iterations_since_restore": 166, "perf": {"cpu_util_percent": 33.48510638297872, "ram_util_percent": 58.49574468085109}} +{"episode_reward_max": 633.0, "episode_reward_min": 416.0, "episode_reward_mean": 574.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.395}, "custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.19, "shaped_reward_min": 136, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0, 587.0, 630.0, 579.0, 579.0, 576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0, 573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.351083451607247, "mean_processing_ms": 0.3381304952990823, "mean_inference_ms": 1.9054854328203157}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4008000, "num_steps_sampled": 2137600, "sample_time_ms": 21012.721, "load_time_ms": 38.367, "grad_time_ms": 10014.737, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006168690975755453, "policy_loss": -0.002181840827688575, "vf_loss": 88.96065521240234, "vf_explained_var": 0.762434184551239, "kl": 0.0017693521222099662, "entropy": 1.0910512208938599, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2137600, "episodes_total": 5344, "training_iteration": 167, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-08-35", "timestamp": 1660252115, "time_this_iter_s": 31.34629511833191, "time_total_s": 10532.423199415207, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10532.423199415207, "timesteps_since_restore": 2137600, "iterations_since_restore": 167, "perf": {"cpu_util_percent": 33.334090909090904, "ram_util_percent": 58.479545454545466}} +{"episode_reward_max": 633.0, "episode_reward_min": 452.0, "episode_reward_mean": 571.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.905}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.21, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0, 579.0, 513.0, 582.0, 582.0, 573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0, 579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3456449831196562, "mean_processing_ms": 0.3370529700799381, "mean_inference_ms": 1.8997987412977424}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4032000, "num_steps_sampled": 2150400, "sample_time_ms": 20935.463, "load_time_ms": 38.009, "grad_time_ms": 9985.412, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029837340116500854, "policy_loss": -0.005580740049481392, "vf_loss": 91.1910629272461, "vf_explained_var": 0.7490768432617188, "kl": 0.0017398769268766046, "entropy": 1.1092572212219238, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2150400, "episodes_total": 5376, "training_iteration": 168, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-09-04", "timestamp": 1660252144, "time_this_iter_s": 29.37734818458557, "time_total_s": 10561.800547599792, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10561.800547599792, "timesteps_since_restore": 2150400, "iterations_since_restore": 168, "perf": {"cpu_util_percent": 32.31428571428572, "ram_util_percent": 58.38571428571428}} +{"episode_reward_max": 633.0, "episode_reward_min": 452.0, "episode_reward_mean": 570.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.355}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.91, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.74, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.94, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.94, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.94, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 573.0, 579.0, 570.0, 579.0, 579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0, 582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3402643108658208, "mean_processing_ms": 0.3359831351985412, "mean_inference_ms": 1.8939334033513233}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4056000, "num_steps_sampled": 2163200, "sample_time_ms": 20463.428, "load_time_ms": 38.087, "grad_time_ms": 10025.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005885738879442215, "policy_loss": -0.001977432519197464, "vf_loss": 84.17040252685547, "vf_explained_var": 0.7570996880531311, "kl": 0.0022582625970244408, "entropy": 1.1077399253845215, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2163200, "episodes_total": 5408, "training_iteration": 169, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-09-33", "timestamp": 1660252173, "time_this_iter_s": 28.335352182388306, "time_total_s": 10590.13589978218, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10590.13589978218, "timesteps_since_restore": 2163200, "iterations_since_restore": 169, "perf": {"cpu_util_percent": 29.0625, "ram_util_percent": 58.379999999999995}} +{"episode_reward_max": 633.0, "episode_reward_min": 422.0, "episode_reward_mean": 569.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 284.605}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.61, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.33, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.4, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.63, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.4, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.63, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.4, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.63, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0, 582.0, 582.0, 522.0, 579.0, 582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0, 579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.334950887862658, "mean_processing_ms": 0.3349244818889894, "mean_inference_ms": 1.8881606558059565}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4080000, "num_steps_sampled": 2176000, "sample_time_ms": 20370.044, "load_time_ms": 38.206, "grad_time_ms": 10005.991, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00501619465649128, "policy_loss": -0.0036706894170492887, "vf_loss": 92.4554214477539, "vf_explained_var": 0.7515974044799805, "kl": 0.0018303836695849895, "entropy": 1.1173133850097656, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2176000, "episodes_total": 5440, "training_iteration": 170, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-00", "timestamp": 1660252200, "time_this_iter_s": 27.505138874053955, "time_total_s": 10617.641038656235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10617.641038656235, "timesteps_since_restore": 2176000, "iterations_since_restore": 170, "perf": {"cpu_util_percent": 30.13076923076923, "ram_util_percent": 58.446153846153834}} +{"episode_reward_max": 630.0, "episode_reward_min": 422.0, "episode_reward_mean": 571.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 285.645}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.89, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.26, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.6, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.52, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.52, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.52, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3296971139486637, "mean_processing_ms": 0.333876638718542, "mean_inference_ms": 1.8823863210387035}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4104000, "num_steps_sampled": 2188800, "sample_time_ms": 20376.689, "load_time_ms": 38.395, "grad_time_ms": 9898.441, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001497833989560604, "policy_loss": -0.006948364432901144, "vf_loss": 90.00249481201172, "vf_explained_var": 0.7635095119476318, "kl": 0.0017910072347149253, "entropy": 1.1081151962280273, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2188800, "episodes_total": 5472, "training_iteration": 171, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-27", "timestamp": 1660252227, "time_this_iter_s": 27.183032989501953, "time_total_s": 10644.824071645737, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10644.824071645737, "timesteps_since_restore": 2188800, "iterations_since_restore": 171, "perf": {"cpu_util_percent": 32.57105263157895, "ram_util_percent": 58.3842105263158}} +{"episode_reward_max": 630.0, "episode_reward_min": 422.0, "episode_reward_mean": 573.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.845}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 178.09, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.25, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.51, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.51, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.51, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0, 587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3245227944112767, "mean_processing_ms": 0.33284763340426393, "mean_inference_ms": 1.876875864691374}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4128000, "num_steps_sampled": 2201600, "sample_time_ms": 20481.131, "load_time_ms": 38.136, "grad_time_ms": 9697.559, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0045767915435135365, "policy_loss": -0.0035035184118896723, "vf_loss": 86.42507934570312, "vf_explained_var": 0.7563931345939636, "kl": 0.002320564817637205, "entropy": 1.1244043111801147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2201600, "episodes_total": 5504, "training_iteration": 172, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-56", "timestamp": 1660252256, "time_this_iter_s": 28.577091932296753, "time_total_s": 10673.401163578033, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10673.401163578033, "timesteps_since_restore": 2201600, "iterations_since_restore": 172, "perf": {"cpu_util_percent": 35.19024390243903, "ram_util_percent": 58.548780487804876}} +{"episode_reward_max": 630.0, "episode_reward_min": 496.0, "episode_reward_mean": 574.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 287.015}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.63, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.48, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.69, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.69, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.69, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0, 579.0, 576.0, 536.0, 573.0, 587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3194261704658106, "mean_processing_ms": 0.3318357537689677, "mean_inference_ms": 1.8715822466645085}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4152000, "num_steps_sampled": 2214400, "sample_time_ms": 20271.412, "load_time_ms": 38.227, "grad_time_ms": 9546.44, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0062603577971458435, "policy_loss": -0.0018654250307008624, "vf_loss": 86.83306121826172, "vf_explained_var": 0.7576972842216492, "kl": 0.0021647585090249777, "entropy": 1.1150306463241577, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2214400, "episodes_total": 5536, "training_iteration": 173, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-11-25", "timestamp": 1660252285, "time_this_iter_s": 28.57458209991455, "time_total_s": 10701.975745677948, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10701.975745677948, "timesteps_since_restore": 2214400, "iterations_since_restore": 173, "perf": {"cpu_util_percent": 30.26, "ram_util_percent": 58.657500000000006}} +{"episode_reward_max": 633.0, "episode_reward_min": 496.0, "episode_reward_mean": 576.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 288.29}, "custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.78, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.65, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.13, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.01, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.86, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.86, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.86, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0, 590.0, 539.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3143927753552085, "mean_processing_ms": 0.33083733112110686, "mean_inference_ms": 1.8663998879774686}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4176000, "num_steps_sampled": 2227200, "sample_time_ms": 20085.918, "load_time_ms": 37.439, "grad_time_ms": 9177.291, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001482805237174034, "policy_loss": -0.00769606651738286, "vf_loss": 81.13143920898438, "vf_explained_var": 0.764965295791626, "kl": 0.0018476974219083786, "entropy": 1.1307072639465332, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2227200, "episodes_total": 5568, "training_iteration": 174, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-11-52", "timestamp": 1660252312, "time_this_iter_s": 27.522704124450684, "time_total_s": 10729.498449802399, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10729.498449802399, "timesteps_since_restore": 2227200, "iterations_since_restore": 174, "perf": {"cpu_util_percent": 34.294871794871796, "ram_util_percent": 58.587179487179476}} +{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 573.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 286.51}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.82, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.37, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.56, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.07, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 567.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0, 567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3094165014431445, "mean_processing_ms": 0.3298474823059415, "mean_inference_ms": 1.8613034748518011}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4200000, "num_steps_sampled": 2240000, "sample_time_ms": 20009.535, "load_time_ms": 37.081, "grad_time_ms": 9049.935, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033403884153813124, "policy_loss": -0.004778089467436075, "vf_loss": 86.8664779663086, "vf_explained_var": 0.7622640132904053, "kl": 0.0018111681565642357, "entropy": 1.1363428831100464, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2240000, "episodes_total": 5600, "training_iteration": 175, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-12-22", "timestamp": 1660252342, "time_this_iter_s": 29.488188982009888, "time_total_s": 10758.986638784409, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10758.986638784409, "timesteps_since_restore": 2240000, "iterations_since_restore": 175, "perf": {"cpu_util_percent": 31.97380952380952, "ram_util_percent": 58.61666666666667}} +{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 573.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 286.835}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.47, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.4, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0, 630.0, 576.0, 567.0, 582.0, 567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0, 507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.304511305303639, "mean_processing_ms": 0.3288745301367266, "mean_inference_ms": 1.8566250484766516}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4224000, "num_steps_sampled": 2252800, "sample_time_ms": 20239.261, "load_time_ms": 36.617, "grad_time_ms": 8693.262, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0060581061989068985, "policy_loss": -0.0023995088413357735, "vf_loss": 90.20238494873047, "vf_explained_var": 0.7652048468589783, "kl": 0.0019277030369266868, "entropy": 1.1252202987670898, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2252800, "episodes_total": 5632, "training_iteration": 176, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-12-54", "timestamp": 1660252374, "time_this_iter_s": 32.0580530166626, "time_total_s": 10791.044691801071, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10791.044691801071, "timesteps_since_restore": 2252800, "iterations_since_restore": 176, "perf": {"cpu_util_percent": 31.34666666666667, "ram_util_percent": 58.57333333333334}} +{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 571.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.7}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.2, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.61, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0, 536.0, 582.0, 579.0, 582.0, 507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2996756400527443, "mean_processing_ms": 0.3279171800323935, "mean_inference_ms": 1.852052219769451}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4248000, "num_steps_sampled": 2265600, "sample_time_ms": 20324.791, "load_time_ms": 36.647, "grad_time_ms": 8435.096, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008986306493170559, "policy_loss": -0.007334645837545395, "vf_loss": 87.94988250732422, "vf_explained_var": 0.7740858197212219, "kl": 0.001811654889024794, "entropy": 1.123410940170288, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2265600, "episodes_total": 5664, "training_iteration": 177, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-13-23", "timestamp": 1660252403, "time_this_iter_s": 29.61364197731018, "time_total_s": 10820.658333778381, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10820.658333778381, "timesteps_since_restore": 2265600, "iterations_since_restore": 177, "perf": {"cpu_util_percent": 35.069047619047616, "ram_util_percent": 58.67619047619048}} +{"episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 576.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.0}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.0, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.63, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.54, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.54, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.54, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0, 630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2948917928576587, "mean_processing_ms": 0.3269695972321587, "mean_inference_ms": 1.8475779345693215}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4272000, "num_steps_sampled": 2278400, "sample_time_ms": 20480.726, "load_time_ms": 37.228, "grad_time_ms": 8437.297, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004881067667156458, "policy_loss": -0.003187847323715687, "vf_loss": 86.31526947021484, "vf_explained_var": 0.7646486163139343, "kl": 0.0018008003244176507, "entropy": 1.125217080116272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2278400, "episodes_total": 5696, "training_iteration": 178, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-13-54", "timestamp": 1660252434, "time_this_iter_s": 30.965723037719727, "time_total_s": 10851.624056816101, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10851.624056816101, "timesteps_since_restore": 2278400, "iterations_since_restore": 178, "perf": {"cpu_util_percent": 34.43636363636364, "ram_util_percent": 58.54318181818183}} +{"episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 577.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.99}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.38, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.28, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 530.0, 576.0, 579.0, 582.0, 630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0, 582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2901493032290514, "mean_processing_ms": 0.3260293499521716, "mean_inference_ms": 1.842905806043276}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4296000, "num_steps_sampled": 2291200, "sample_time_ms": 20658.749, "load_time_ms": 37.127, "grad_time_ms": 8627.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005556942895054817, "policy_loss": -0.0025492331478744745, "vf_loss": 86.67485809326172, "vf_explained_var": 0.7664775848388672, "kl": 0.0018904004245996475, "entropy": 1.1226133108139038, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2291200, "episodes_total": 5728, "training_iteration": 179, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-14-26", "timestamp": 1660252466, "time_this_iter_s": 32.01629400253296, "time_total_s": 10883.640350818634, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10883.640350818634, "timesteps_since_restore": 2291200, "iterations_since_restore": 179, "perf": {"cpu_util_percent": 31.479999999999997, "ram_util_percent": 58.526666666666664}} +{"episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 578.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 289.07}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.54, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0, 590.0, 579.0, 573.0, 587.0, 582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0, 587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2854556266765977, "mean_processing_ms": 0.3250985864852822, "mean_inference_ms": 1.838409331377913}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4320000, "num_steps_sampled": 2304000, "sample_time_ms": 20831.137, "load_time_ms": 37.325, "grad_time_ms": 8787.85, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003588956082239747, "policy_loss": -0.004645919892936945, "vf_loss": 88.00481414794922, "vf_explained_var": 0.7581232190132141, "kl": 0.0017625847831368446, "entropy": 1.131211280822754, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2304000, "episodes_total": 5760, "training_iteration": 180, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-14-57", "timestamp": 1660252497, "time_this_iter_s": 30.824997186660767, "time_total_s": 10914.465348005295, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10914.465348005295, "timesteps_since_restore": 2304000, "iterations_since_restore": 180, "perf": {"cpu_util_percent": 31.02954545454545, "ram_util_percent": 58.488636363636374}} +{"episode_reward_max": 630.0, "episode_reward_min": 501.0, "episode_reward_mean": 575.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.89}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.38, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.23, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.04, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0, 584.0, 582.0, 573.0, 587.0, 587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0]}, "sampler_perf": {"mean_env_wait_ms": 1.280816050662255, "mean_processing_ms": 0.3241780547884837, "mean_inference_ms": 1.833999332912814}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4344000, "num_steps_sampled": 2316800, "sample_time_ms": 21054.918, "load_time_ms": 37.113, "grad_time_ms": 8943.853, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029653788078576326, "policy_loss": -0.005187256261706352, "vf_loss": 87.18419647216797, "vf_explained_var": 0.7553746104240417, "kl": 0.0017378958873450756, "entropy": 1.13156259059906, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2316800, "episodes_total": 5792, "training_iteration": 181, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-15-28", "timestamp": 1660252528, "time_this_iter_s": 30.97549271583557, "time_total_s": 10945.44084072113, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10945.44084072113, "timesteps_since_restore": 2316800, "iterations_since_restore": 181, "perf": {"cpu_util_percent": 34.48409090909092, "ram_util_percent": 58.6068181818182}} +{"episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 573.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.56}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.72, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0, 582.0, 579.0, 579.0, 576.0, 579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0, 521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2762174298521924, "mean_processing_ms": 0.3232647451131093, "mean_inference_ms": 1.8295321417191508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4368000, "num_steps_sampled": 2329600, "sample_time_ms": 20991.973, "load_time_ms": 37.186, "grad_time_ms": 9226.22, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002576154889538884, "policy_loss": -0.005821262951940298, "vf_loss": 89.62581634521484, "vf_explained_var": 0.7608991265296936, "kl": 0.002179400995373726, "entropy": 1.1303036212921143, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2329600, "episodes_total": 5824, "training_iteration": 182, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-15-59", "timestamp": 1660252559, "time_this_iter_s": 30.775686264038086, "time_total_s": 10976.216526985168, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10976.216526985168, "timesteps_since_restore": 2329600, "iterations_since_restore": 182, "perf": {"cpu_util_percent": 31.2, "ram_util_percent": 58.5}} +{"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 575.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.805}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.61, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0, 576.0, 530.0, 582.0, 624.0, 521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0, 576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2716706020385649, "mean_processing_ms": 0.32236021091809197, "mean_inference_ms": 1.8252783373393515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4392000, "num_steps_sampled": 2342400, "sample_time_ms": 21166.824, "load_time_ms": 37.427, "grad_time_ms": 9604.389, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001810177811421454, "policy_loss": -0.006374426186084747, "vf_loss": 87.48321533203125, "vf_explained_var": 0.7590639591217041, "kl": 0.00198071519844234, "entropy": 1.1274290084838867, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2342400, "episodes_total": 5856, "training_iteration": 183, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-16-33", "timestamp": 1660252593, "time_this_iter_s": 34.10594201087952, "time_total_s": 11010.322468996048, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11010.322468996048, "timesteps_since_restore": 2342400, "iterations_since_restore": 183, "perf": {"cpu_util_percent": 32.16041666666667, "ram_util_percent": 58.54791666666666}} +{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 581.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 290.625}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.65, "shaped_reward_min": 145, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0, 573.0, 587.0, 587.0, 579.0, 576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.26716887743788, "mean_processing_ms": 0.3214640615198408, "mean_inference_ms": 1.8211055910807965}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4416000, "num_steps_sampled": 2355200, "sample_time_ms": 21369.68, "load_time_ms": 37.437, "grad_time_ms": 9828.237, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0049454327672719955, "policy_loss": -0.002732283202931285, "vf_loss": 82.44231414794922, "vf_explained_var": 0.771254301071167, "kl": 0.0019334623357281089, "entropy": 1.1330214738845825, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2355200, "episodes_total": 5888, "training_iteration": 184, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-17-05", "timestamp": 1660252625, "time_this_iter_s": 31.787577867507935, "time_total_s": 11042.110046863556, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11042.110046863556, "timesteps_since_restore": 2355200, "iterations_since_restore": 184, "perf": {"cpu_util_percent": 30.99777777777778, "ram_util_percent": 58.44666666666665}} +{"episode_reward_max": 636.0, "episode_reward_min": 194.0, "episode_reward_mean": 578.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.095}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 178.59, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.44, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0, 582.0, 584.0, 627.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0, 573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2627159762487399, "mean_processing_ms": 0.320576860334056, "mean_inference_ms": 1.8169666521005257}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4440000, "num_steps_sampled": 2368000, "sample_time_ms": 21316.526, "load_time_ms": 37.828, "grad_time_ms": 10200.15, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004557406529784203, "policy_loss": -0.004057899583131075, "vf_loss": 91.82827758789062, "vf_explained_var": 0.7658367156982422, "kl": 0.001969862962141633, "entropy": 1.135046362876892, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2368000, "episodes_total": 5920, "training_iteration": 185, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-17-38", "timestamp": 1660252658, "time_this_iter_s": 32.679043769836426, "time_total_s": 11074.789090633392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11074.789090633392, "timesteps_since_restore": 2368000, "iterations_since_restore": 185, "perf": {"cpu_util_percent": 33.50425531914893, "ram_util_percent": 58.438297872340435}} +{"episode_reward_max": 636.0, "episode_reward_min": 194.0, "episode_reward_mean": 576.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.105}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 177.81, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0, 576.0, 630.0, 582.0, 582.0, 573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0, 579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0]}, "sampler_perf": {"mean_env_wait_ms": 1.258340305403844, "mean_processing_ms": 0.31970797918049665, "mean_inference_ms": 1.8136714986418816}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4464000, "num_steps_sampled": 2380800, "sample_time_ms": 21817.869, "load_time_ms": 37.793, "grad_time_ms": 10598.659, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005312865134328604, "policy_loss": -0.0029478278011083603, "vf_loss": 88.26638793945312, "vf_explained_var": 0.762065589427948, "kl": 0.0017753179417923093, "entropy": 1.1319037675857544, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2380800, "episodes_total": 5952, "training_iteration": 186, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-18-19", "timestamp": 1660252699, "time_this_iter_s": 41.059054136276245, "time_total_s": 11115.848144769669, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11115.848144769669, "timesteps_since_restore": 2380800, "iterations_since_restore": 186, "perf": {"cpu_util_percent": 27.889655172413793, "ram_util_percent": 58.474137931034484}} +{"episode_reward_max": 633.0, "episode_reward_min": 194.0, "episode_reward_mean": 572.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.375}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 176.75, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.41, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.41, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.41, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0, 590.0, 579.0, 561.0, 579.0, 579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0, 570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2540029262486032, "mean_processing_ms": 0.31884364369781465, "mean_inference_ms": 1.8101035219779944}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4488000, "num_steps_sampled": 2393600, "sample_time_ms": 21725.743, "load_time_ms": 37.383, "grad_time_ms": 10590.941, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005064256023615599, "policy_loss": -0.0036211840342730284, "vf_loss": 92.49484252929688, "vf_explained_var": 0.7542417645454407, "kl": 0.001856558839790523, "entropy": 1.1280810832977295, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2393600, "episodes_total": 5984, "training_iteration": 187, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-18-47", "timestamp": 1660252727, "time_this_iter_s": 28.611520051956177, "time_total_s": 11144.459664821625, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11144.459664821625, "timesteps_since_restore": 2393600, "iterations_since_restore": 187, "perf": {"cpu_util_percent": 32.7725, "ram_util_percent": 58.567499999999995}} +{"episode_reward_max": 636.0, "episode_reward_min": 461.0, "episode_reward_mean": 580.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 290.15}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.1, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.74, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0, 527.0, 576.0, 630.0, 587.0, 570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0, 576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2497077213230872, "mean_processing_ms": 0.3179872495448108, "mean_inference_ms": 1.8063396156782892}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4512000, "num_steps_sampled": 2406400, "sample_time_ms": 21491.638, "load_time_ms": 36.743, "grad_time_ms": 10320.581, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005896018352359533, "policy_loss": -0.002354246797040105, "vf_loss": 88.0772933959961, "vf_explained_var": 0.767683744430542, "kl": 0.0020883409306406975, "entropy": 1.1149283647537231, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2406400, "episodes_total": 6016, "training_iteration": 188, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-19-13", "timestamp": 1660252753, "time_this_iter_s": 25.91284203529358, "time_total_s": 11170.372506856918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11170.372506856918, "timesteps_since_restore": 2406400, "iterations_since_restore": 188, "perf": {"cpu_util_percent": 33.778378378378385, "ram_util_percent": 58.56486486486485}} +{"episode_reward_max": 636.0, "episode_reward_min": 458.0, "episode_reward_mean": 582.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 291.145}, "custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.89, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.4, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.32, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.4, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.32, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.4, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.32, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0, 627.0, 576.0, 582.0, 582.0, 576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0, 576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0]}, "sampler_perf": {"mean_env_wait_ms": 1.245424462248642, "mean_processing_ms": 0.3171301897785842, "mean_inference_ms": 1.801636761317335}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4536000, "num_steps_sampled": 2419200, "sample_time_ms": 21345.619, "load_time_ms": 36.86, "grad_time_ms": 10234.708, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035058397334069014, "policy_loss": -0.0047208876349031925, "vf_loss": 87.84651947021484, "vf_explained_var": 0.7590529918670654, "kl": 0.0018027568003162742, "entropy": 1.1158560514450073, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2419200, "episodes_total": 6048, "training_iteration": 189, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-19-43", "timestamp": 1660252783, "time_this_iter_s": 29.704707860946655, "time_total_s": 11200.077214717865, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11200.077214717865, "timesteps_since_restore": 2419200, "iterations_since_restore": 189, "perf": {"cpu_util_percent": 33.13571428571428, "ram_util_percent": 58.52142857142859}} +{"episode_reward_max": 636.0, "episode_reward_min": 458.0, "episode_reward_mean": 581.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 290.745}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.89, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.76, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.65, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.04, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.93, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.65, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.04, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.65, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.04, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0, 579.0, 576.0, 579.0, 576.0, 576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0, 579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2411902050042325, "mean_processing_ms": 0.3162859757027142, "mean_inference_ms": 1.7969666432132458}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4560000, "num_steps_sampled": 2432000, "sample_time_ms": 21237.531, "load_time_ms": 36.84, "grad_time_ms": 10068.85, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006991778966039419, "policy_loss": -0.0012481998419389129, "vf_loss": 87.9997329711914, "vf_explained_var": 0.7513763904571533, "kl": 0.0021018313709646463, "entropy": 1.119996428489685, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2432000, "episodes_total": 6080, "training_iteration": 190, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-20-11", "timestamp": 1660252811, "time_this_iter_s": 28.08810520172119, "time_total_s": 11228.165319919586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11228.165319919586, "timesteps_since_restore": 2432000, "iterations_since_restore": 190, "perf": {"cpu_util_percent": 33.77, "ram_util_percent": 58.345000000000006}} +{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 580.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 290.06}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 178.92, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.37, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.37, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.37, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0, 582.0, 630.0, 582.0, 567.0, 579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.237012566934364, "mean_processing_ms": 0.3154539706719903, "mean_inference_ms": 1.7926176479402052}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4584000, "num_steps_sampled": 2444800, "sample_time_ms": 21212.772, "load_time_ms": 37.115, "grad_time_ms": 9943.576, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002480272436514497, "policy_loss": -0.005884131882339716, "vf_loss": 89.30957794189453, "vf_explained_var": 0.7648332118988037, "kl": 0.0016885297372937202, "entropy": 1.1330945491790771, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2444800, "episodes_total": 6112, "training_iteration": 191, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-20-41", "timestamp": 1660252841, "time_this_iter_s": 29.47701120376587, "time_total_s": 11257.642331123352, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11257.642331123352, "timesteps_since_restore": 2444800, "iterations_since_restore": 191, "perf": {"cpu_util_percent": 32.38536585365854, "ram_util_percent": 58.368292682926814}} +{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 583.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 291.615}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.63, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.69, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0, 564.0, 582.0, 627.0, 633.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0, 579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2328827841512373, "mean_processing_ms": 0.3146313961274523, "mean_inference_ms": 1.788385259276164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4608000, "num_steps_sampled": 2457600, "sample_time_ms": 21198.373, "load_time_ms": 37.126, "grad_time_ms": 9801.163, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001665265765041113, "policy_loss": -0.006540585309267044, "vf_loss": 87.64542388916016, "vf_explained_var": 0.7604849338531494, "kl": 0.0022042018827050924, "entropy": 1.1173783540725708, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2457600, "episodes_total": 6144, "training_iteration": 192, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-21-10", "timestamp": 1660252870, "time_this_iter_s": 29.205125331878662, "time_total_s": 11286.84745645523, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11286.84745645523, "timesteps_since_restore": 2457600, "iterations_since_restore": 192, "perf": {"cpu_util_percent": 31.859523809523814, "ram_util_percent": 58.37619047619048}} +{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 586.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 293.055}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.51, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.98, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0, 584.0, 630.0, 576.0, 579.0, 579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0, 579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2287946124429, "mean_processing_ms": 0.3138148366539807, "mean_inference_ms": 1.7841696712783897}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4632000, "num_steps_sampled": 2470400, "sample_time_ms": 20938.204, "load_time_ms": 37.066, "grad_time_ms": 9514.831, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005852494388818741, "policy_loss": -0.0018877206603065133, "vf_loss": 83.014892578125, "vf_explained_var": 0.7724275588989258, "kl": 0.0019637763034552336, "entropy": 1.1225537061691284, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2470400, "episodes_total": 6176, "training_iteration": 193, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-21-39", "timestamp": 1660252899, "time_this_iter_s": 28.640799045562744, "time_total_s": 11315.488255500793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11315.488255500793, "timesteps_since_restore": 2470400, "iterations_since_restore": 193, "perf": {"cpu_util_percent": 32.46, "ram_util_percent": 58.379999999999995}} +{"episode_reward_max": 636.0, "episode_reward_min": 513.0, "episode_reward_mean": 587.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 293.645}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.69, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0, 579.0, 582.0, 627.0, 522.0, 579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0, 587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2247614188456033, "mean_processing_ms": 0.3130098403023273, "mean_inference_ms": 1.7801660461855682}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4656000, "num_steps_sampled": 2483200, "sample_time_ms": 21025.315, "load_time_ms": 37.175, "grad_time_ms": 9483.469, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005179767496883869, "policy_loss": -0.003016052069142461, "vf_loss": 87.5873031616211, "vf_explained_var": 0.7668092250823975, "kl": 0.0019739444833248854, "entropy": 1.125815987586975, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2483200, "episodes_total": 6208, "training_iteration": 194, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-22-11", "timestamp": 1660252931, "time_this_iter_s": 32.347792863845825, "time_total_s": 11347.83604836464, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11347.83604836464, "timesteps_since_restore": 2483200, "iterations_since_restore": 194, "perf": {"cpu_util_percent": 33.310869565217395, "ram_util_percent": 58.36521739130432}} +{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 584.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.085}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.97, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.24, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.83, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.18, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.27, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.83, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.18, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.83, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.18, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 544.0, 579.0, 627.0, 587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0, 633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2207711535428367, "mean_processing_ms": 0.31221340698687855, "mean_inference_ms": 1.7762098630677763}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4680000, "num_steps_sampled": 2496000, "sample_time_ms": 21023.805, "load_time_ms": 36.814, "grad_time_ms": 9071.865, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004175250884145498, "policy_loss": -0.004759882111102343, "vf_loss": 95.0163803100586, "vf_explained_var": 0.7534318566322327, "kl": 0.0021568441297858953, "entropy": 1.1329950094223022, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2496000, "episodes_total": 6240, "training_iteration": 195, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-22-40", "timestamp": 1660252960, "time_this_iter_s": 28.54381275177002, "time_total_s": 11376.37986111641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11376.37986111641, "timesteps_since_restore": 2496000, "iterations_since_restore": 195, "perf": {"cpu_util_percent": 32.82000000000001, "ram_util_percent": 58.44}} +{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 583.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.515}, "custom_metrics": {"sparse_reward_mean": 201.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.23, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.29, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.81, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0, 579.0, 587.0, 579.0, 567.0, 633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0, 573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2168243946409252, "mean_processing_ms": 0.31142476273286085, "mean_inference_ms": 1.7722672001307933}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4704000, "num_steps_sampled": 2508800, "sample_time_ms": 20149.8, "load_time_ms": 36.819, "grad_time_ms": 8683.766, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005854760762304068, "policy_loss": -0.0029166024178266525, "vf_loss": 93.378173828125, "vf_explained_var": 0.7535201907157898, "kl": 0.00207762373611331, "entropy": 1.1329069137573242, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2508800, "episodes_total": 6272, "training_iteration": 196, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-23-08", "timestamp": 1660252988, "time_this_iter_s": 28.434014320373535, "time_total_s": 11404.813875436783, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11404.813875436783, "timesteps_since_restore": 2508800, "iterations_since_restore": 196, "perf": {"cpu_util_percent": 34.197500000000005, "ram_util_percent": 58.575}} +{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 581.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 290.86}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.72, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.95, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.05, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0, 582.0, 590.0, 582.0, 587.0, 573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2129132177848776, "mean_processing_ms": 0.31064427839869574, "mean_inference_ms": 1.768252985066466}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4728000, "num_steps_sampled": 2521600, "sample_time_ms": 20346.302, "load_time_ms": 37.457, "grad_time_ms": 8681.875, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004015960264950991, "policy_loss": -0.004478786140680313, "vf_loss": 90.65137481689453, "vf_explained_var": 0.7576496601104736, "kl": 0.0018428467446938157, "entropy": 1.140787959098816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2521600, "episodes_total": 6304, "training_iteration": 197, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-23-39", "timestamp": 1660253019, "time_this_iter_s": 30.565216064453125, "time_total_s": 11435.379091501236, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11435.379091501236, "timesteps_since_restore": 2521600, "iterations_since_restore": 197, "perf": {"cpu_util_percent": 35.77272727272727, "ram_util_percent": 59.17272727272728}} +{"episode_reward_max": 633.0, "episode_reward_min": 498.0, "episode_reward_mean": 580.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 290.44}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.28, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.24, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0, 587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2090434460421848, "mean_processing_ms": 0.3098731145088783, "mean_inference_ms": 1.7643973758778697}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4752000, "num_steps_sampled": 2534400, "sample_time_ms": 20635.573, "load_time_ms": 37.478, "grad_time_ms": 8930.84, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.008007452823221684, "policy_loss": -0.00045695496373809874, "vf_loss": 90.38675689697266, "vf_explained_var": 0.7534659504890442, "kl": 0.0025916944723576307, "entropy": 1.1485199928283691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2534400, "episodes_total": 6336, "training_iteration": 198, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-24-10", "timestamp": 1660253050, "time_this_iter_s": 31.295607089996338, "time_total_s": 11466.674698591232, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11466.674698591232, "timesteps_since_restore": 2534400, "iterations_since_restore": 198, "perf": {"cpu_util_percent": 30.265909090909087, "ram_util_percent": 58.70227272727273}} +{"episode_reward_max": 633.0, "episode_reward_min": 498.0, "episode_reward_mean": 579.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 289.78}, "custom_metrics": {"sparse_reward_mean": 200.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.16, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.02, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0, 582.0, 573.0, 630.0, 582.0, 587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0, 630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2052169809302002, "mean_processing_ms": 0.3091122486533884, "mean_inference_ms": 1.7607399677301792}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4776000, "num_steps_sampled": 2547200, "sample_time_ms": 20764.539, "load_time_ms": 37.37, "grad_time_ms": 8925.603, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002967844484373927, "policy_loss": -0.005320979747921228, "vf_loss": 88.59487915039062, "vf_explained_var": 0.7679054141044617, "kl": 0.0019444593926891685, "entropy": 1.141340732574463, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2547200, "episodes_total": 6368, "training_iteration": 199, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-24-41", "timestamp": 1660253081, "time_this_iter_s": 30.933609008789062, "time_total_s": 11497.608307600021, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11497.608307600021, "timesteps_since_restore": 2547200, "iterations_since_restore": 199, "perf": {"cpu_util_percent": 31.409090909090903, "ram_util_percent": 58.724999999999994}} +{"episode_reward_max": 633.0, "episode_reward_min": 368.0, "episode_reward_mean": 574.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 179.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 287.24}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 177.28, "shaped_reward_min": 128, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.05, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0, 576.0, 582.0, 516.0, 503.0, 630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2014273105628746, "mean_processing_ms": 0.30835462294201915, "mean_inference_ms": 1.7571376095037237}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4800000, "num_steps_sampled": 2560000, "sample_time_ms": 20842.97, "load_time_ms": 37.195, "grad_time_ms": 9109.166, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005231037735939026, "policy_loss": -0.0033684810623526573, "vf_loss": 91.692626953125, "vf_explained_var": 0.7593931555747986, "kl": 0.002331085503101349, "entropy": 1.1394835710525513, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2560000, "episodes_total": 6400, "training_iteration": 200, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-25-12", "timestamp": 1660253112, "time_this_iter_s": 30.703901290893555, "time_total_s": 11528.312208890915, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11528.312208890915, "timesteps_since_restore": 2560000, "iterations_since_restore": 200, "perf": {"cpu_util_percent": 32.688372093023254, "ram_util_percent": 58.67906976744187}} +{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 576.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 288.47}, "custom_metrics": {"sparse_reward_mean": 199.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.74, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.98, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.07, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.63, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.63, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.63, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0, 630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1976757528000228, "mean_processing_ms": 0.3076033986967843, "mean_inference_ms": 1.7536061115922081}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4824000, "num_steps_sampled": 2572800, "sample_time_ms": 20929.774, "load_time_ms": 36.844, "grad_time_ms": 9259.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019615469500422478, "policy_loss": -0.006233252584934235, "vf_loss": 87.63289642333984, "vf_explained_var": 0.7635285258293152, "kl": 0.0017622611485421658, "entropy": 1.1369844675064087, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2572800, "episodes_total": 6432, "training_iteration": 201, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-25-44", "timestamp": 1660253144, "time_this_iter_s": 31.842552185058594, "time_total_s": 11560.154761075974, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11560.154761075974, "timesteps_since_restore": 2572800, "iterations_since_restore": 201, "perf": {"cpu_util_percent": 24.447826086956525, "ram_util_percent": 58.667391304347845}} +{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 573.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.725}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.05, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.26, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.46, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.88, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.28, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.46, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.88, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.46, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.88, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0, 579.0, 564.0, 579.0, 582.0, 630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0, 576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1939639805139826, "mean_processing_ms": 0.3068617868060299, "mean_inference_ms": 1.750206276185966}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4848000, "num_steps_sampled": 2585600, "sample_time_ms": 21104.737, "load_time_ms": 36.737, "grad_time_ms": 9324.388, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035205576568841934, "policy_loss": -0.004760665353387594, "vf_loss": 88.47342681884766, "vf_explained_var": 0.7671054005622864, "kl": 0.0017035487107932568, "entropy": 1.1322449445724487, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2585600, "episodes_total": 6464, "training_iteration": 202, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-26-16", "timestamp": 1660253176, "time_this_iter_s": 31.605774879455566, "time_total_s": 11591.760535955429, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11591.760535955429, "timesteps_since_restore": 2585600, "iterations_since_restore": 202, "perf": {"cpu_util_percent": 31.240000000000006, "ram_util_percent": 58.77555555555557}} +{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 580.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 290.155}, "custom_metrics": {"sparse_reward_mean": 200.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 179.51, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0, 567.0, 633.0, 582.0, 582.0, 576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0, 579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.190287620141578, "mean_processing_ms": 0.3061308463803027, "mean_inference_ms": 1.7468323528445506}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4872000, "num_steps_sampled": 2598400, "sample_time_ms": 21208.335, "load_time_ms": 36.421, "grad_time_ms": 9429.96, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0062708244659006596, "policy_loss": -0.0019446747610345483, "vf_loss": 87.80118560791016, "vf_explained_var": 0.7648043632507324, "kl": 0.001872226013801992, "entropy": 1.1292202472686768, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2598400, "episodes_total": 6496, "training_iteration": 203, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-26-47", "timestamp": 1660253207, "time_this_iter_s": 30.729102849960327, "time_total_s": 11622.48963880539, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11622.48963880539, "timesteps_since_restore": 2598400, "iterations_since_restore": 203, "perf": {"cpu_util_percent": 31.509302325581398, "ram_util_percent": 58.665116279069764}} +{"episode_reward_max": 633.0, "episode_reward_min": 475.0, "episode_reward_mean": 581.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 290.785}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.37, "shaped_reward_min": 155, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.37, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0, 576.0, 576.0, 582.0, 582.0, 579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0, 582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1866467197845827, "mean_processing_ms": 0.3054065028212096, "mean_inference_ms": 1.7433019705968333}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4896000, "num_steps_sampled": 2611200, "sample_time_ms": 20995.947, "load_time_ms": 36.336, "grad_time_ms": 9492.439, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 3.609728810261004e-05, "policy_loss": -0.0082255182787776, "vf_loss": 88.31702423095703, "vf_explained_var": 0.7638809680938721, "kl": 0.0019561152439564466, "entropy": 1.140177845954895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2611200, "episodes_total": 6528, "training_iteration": 204, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-27-18", "timestamp": 1660253238, "time_this_iter_s": 30.846153020858765, "time_total_s": 11653.335791826248, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11653.335791826248, "timesteps_since_restore": 2611200, "iterations_since_restore": 204, "perf": {"cpu_util_percent": 30.906818181818178, "ram_util_percent": 58.60227272727274}} +{"episode_reward_max": 633.0, "episode_reward_min": 237.0, "episode_reward_mean": 583.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 291.985}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.37, "shaped_reward_min": 77, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0, 475.0, 587.0, 582.0, 579.0, 582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0, 576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1830353872295725, "mean_processing_ms": 0.3046858000734139, "mean_inference_ms": 1.7395956173502736}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4920000, "num_steps_sampled": 2624000, "sample_time_ms": 20976.168, "load_time_ms": 36.272, "grad_time_ms": 9605.806, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005292419344186783, "policy_loss": -0.002614696277305484, "vf_loss": 84.73992156982422, "vf_explained_var": 0.7728293538093567, "kl": 0.0027176842559129, "entropy": 1.1337858438491821, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2624000, "episodes_total": 6560, "training_iteration": 205, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-27-47", "timestamp": 1660253267, "time_this_iter_s": 29.478952169418335, "time_total_s": 11682.814743995667, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11682.814743995667, "timesteps_since_restore": 2624000, "iterations_since_restore": 205, "perf": {"cpu_util_percent": 32.5452380952381, "ram_util_percent": 58.56666666666667}} +{"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 586.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 293.06}, "custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.12, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0, 633.0, 630.0, 582.0, 630.0, 576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0, 237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1794594867485337, "mean_processing_ms": 0.3039706521282891, "mean_inference_ms": 1.735821597361437}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4944000, "num_steps_sampled": 2636800, "sample_time_ms": 21024.303, "load_time_ms": 36.234, "grad_time_ms": 9682.83, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00302000530064106, "policy_loss": -0.005335552152246237, "vf_loss": 89.21270751953125, "vf_explained_var": 0.7561216354370117, "kl": 0.0017618268029764295, "entropy": 1.131414532661438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2636800, "episodes_total": 6592, "training_iteration": 206, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-28-17", "timestamp": 1660253297, "time_this_iter_s": 29.685957193374634, "time_total_s": 11712.500701189041, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11712.500701189041, "timesteps_since_restore": 2636800, "iterations_since_restore": 206, "perf": {"cpu_util_percent": 30.95714285714286, "ram_util_percent": 58.669047619047625}} +{"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 589.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.77}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.34, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.54, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.25, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.54, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.25, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.54, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.25, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0, 582.0, 630.0, 627.0, 582.0, 237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0, 621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1759167909615367, "mean_processing_ms": 0.30326280237978454, "mean_inference_ms": 1.7321279401839695}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4968000, "num_steps_sampled": 2649600, "sample_time_ms": 20977.446, "load_time_ms": 35.842, "grad_time_ms": 9697.985, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015924535691738129, "policy_loss": -0.005817517638206482, "vf_loss": 79.77727508544922, "vf_explained_var": 0.7645978927612305, "kl": 0.001973592210561037, "entropy": 1.1355053186416626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2649600, "episodes_total": 6624, "training_iteration": 207, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-28-47", "timestamp": 1660253327, "time_this_iter_s": 30.242400884628296, "time_total_s": 11742.74310207367, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11742.74310207367, "timesteps_since_restore": 2649600, "iterations_since_restore": 207, "perf": {"cpu_util_percent": 33.359523809523814, "ram_util_percent": 58.971428571428575}} +{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 592.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.11}, "custom_metrics": {"sparse_reward_mean": 205.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.02, "shaped_reward_min": 145, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.04, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.62, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.32, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.62, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.32, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.62, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.32, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0, 579.0, 582.0, 544.0, 573.0, 621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0, 573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1724058892059666, "mean_processing_ms": 0.3025608135721768, "mean_inference_ms": 1.7284261128230096}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4992000, "num_steps_sampled": 2662400, "sample_time_ms": 20797.656, "load_time_ms": 35.752, "grad_time_ms": 9645.805, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0059862625785171986, "policy_loss": -0.00204761722125113, "vf_loss": 86.01973724365234, "vf_explained_var": 0.7589619755744934, "kl": 0.0022174532059580088, "entropy": 1.136189579963684, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2662400, "episodes_total": 6656, "training_iteration": 208, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-29-16", "timestamp": 1660253356, "time_this_iter_s": 28.974893808364868, "time_total_s": 11771.717995882034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11771.717995882034, "timesteps_since_restore": 2662400, "iterations_since_restore": 208, "perf": {"cpu_util_percent": 35.22682926829268, "ram_util_percent": 58.67073170731708}} +{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 593.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.525}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.45, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0, 582.0, 573.0, 579.0, 587.0, 573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0, 584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.168925444559228, "mean_processing_ms": 0.30186423192914913, "mean_inference_ms": 1.7247612604215892}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5016000, "num_steps_sampled": 2675200, "sample_time_ms": 20704.168, "load_time_ms": 35.714, "grad_time_ms": 9573.901, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030685996171087027, "policy_loss": -0.005047030281275511, "vf_loss": 86.80921173095703, "vf_explained_var": 0.7612468600273132, "kl": 0.0021123213227838278, "entropy": 1.1305813789367676, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2675200, "episodes_total": 6688, "training_iteration": 209, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-29-45", "timestamp": 1660253385, "time_this_iter_s": 29.278584241867065, "time_total_s": 11800.996580123901, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11800.996580123901, "timesteps_since_restore": 2675200, "iterations_since_restore": 209, "perf": {"cpu_util_percent": 32.47857142857143, "ram_util_percent": 58.55952380952381}} +{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 592.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.49}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.18, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.66, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.66, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.66, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0, 633.0, 579.0, 630.0, 627.0, 584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0, 630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.165477981572783, "mean_processing_ms": 0.3011732242601824, "mean_inference_ms": 1.7211386547427134}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5040000, "num_steps_sampled": 2688000, "sample_time_ms": 20664.196, "load_time_ms": 35.919, "grad_time_ms": 9521.919, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002112786052748561, "policy_loss": -0.006137066055089235, "vf_loss": 88.17215728759766, "vf_explained_var": 0.7567508220672607, "kl": 0.0019861103501170874, "entropy": 1.1347342729568481, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2688000, "episodes_total": 6720, "training_iteration": 210, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-30-15", "timestamp": 1660253415, "time_this_iter_s": 29.789448976516724, "time_total_s": 11830.786029100418, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11830.786029100418, "timesteps_since_restore": 2688000, "iterations_since_restore": 210, "perf": {"cpu_util_percent": 30.638095238095236, "ram_util_percent": 58.67142857142858}} +{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 586.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 293.28}, "custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.36, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.57, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0, 573.0, 582.0, 564.0, 573.0, 630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0, 573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.162072675470566, "mean_processing_ms": 0.30049261071423955, "mean_inference_ms": 1.7176923877441694}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5064000, "num_steps_sampled": 2700800, "sample_time_ms": 20604.341, "load_time_ms": 36.396, "grad_time_ms": 9451.079, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014451188035309315, "policy_loss": -0.0075116343796253204, "vf_loss": 95.30281829833984, "vf_explained_var": 0.7530279755592346, "kl": 0.001810736837796867, "entropy": 1.147046446800232, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2700800, "episodes_total": 6752, "training_iteration": 211, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-30-46", "timestamp": 1660253446, "time_this_iter_s": 30.540673971176147, "time_total_s": 11861.326703071594, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11861.326703071594, "timesteps_since_restore": 2700800, "iterations_since_restore": 211, "perf": {"cpu_util_percent": 31.16511627906976, "ram_util_percent": 58.63023255813955}} +{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 585.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 292.975}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.55, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.82, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.36, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.36, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.36, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0, 630.0, 582.0, 579.0, 582.0, 573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0, 576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.15872347920077, "mean_processing_ms": 0.2998198878857747, "mean_inference_ms": 1.7145174243808747}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5088000, "num_steps_sampled": 2713600, "sample_time_ms": 20641.235, "load_time_ms": 36.613, "grad_time_ms": 9485.245, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.010028759017586708, "policy_loss": 0.0009867753833532333, "vf_loss": 96.11052703857422, "vf_explained_var": 0.7489395141601562, "kl": 0.0021745015401393175, "entropy": 1.1381220817565918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2713600, "episodes_total": 6784, "training_iteration": 212, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-31-18", "timestamp": 1660253478, "time_this_iter_s": 32.31651592254639, "time_total_s": 11893.64321899414, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11893.64321899414, "timesteps_since_restore": 2713600, "iterations_since_restore": 212, "perf": {"cpu_util_percent": 33.958695652173915, "ram_util_percent": 58.643478260869585}} +{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 583.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.905}, "custom_metrics": {"sparse_reward_mean": 201.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.01, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0, 579.0, 630.0, 582.0, 579.0, 576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0, 627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1554138563428238, "mean_processing_ms": 0.299155513024685, "mean_inference_ms": 1.7115208998476246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5112000, "num_steps_sampled": 2726400, "sample_time_ms": 20734.364, "load_time_ms": 36.691, "grad_time_ms": 9415.374, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028692474588751793, "policy_loss": -0.0050502982921898365, "vf_loss": 84.87030029296875, "vf_explained_var": 0.7659473419189453, "kl": 0.0017100750701501966, "entropy": 1.134959101676941, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2726400, "episodes_total": 6816, "training_iteration": 213, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-31-49", "timestamp": 1660253509, "time_this_iter_s": 30.962037086486816, "time_total_s": 11924.605256080627, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11924.605256080627, "timesteps_since_restore": 2726400, "iterations_since_restore": 213, "perf": {"cpu_util_percent": 30.947727272727267, "ram_util_percent": 58.58863636363639}} +{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 584.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.095}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.59, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0, 587.0, 587.0, 579.0, 636.0, 627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0, 478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1521297133023998, "mean_processing_ms": 0.2984947227408811, "mean_inference_ms": 1.7084618155808986}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5136000, "num_steps_sampled": 2739200, "sample_time_ms": 20745.898, "load_time_ms": 36.566, "grad_time_ms": 9296.474, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004741498734802008, "policy_loss": -0.003847965970635414, "vf_loss": 91.54241943359375, "vf_explained_var": 0.7623968124389648, "kl": 0.00236759171821177, "entropy": 1.1295729875564575, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2739200, "episodes_total": 6848, "training_iteration": 214, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-32-19", "timestamp": 1660253539, "time_this_iter_s": 29.774744749069214, "time_total_s": 11954.380000829697, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11954.380000829697, "timesteps_since_restore": 2739200, "iterations_since_restore": 214, "perf": {"cpu_util_percent": 27.035714285714292, "ram_util_percent": 58.526190476190486}} +{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 584.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.065}, "custom_metrics": {"sparse_reward_mean": 202.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.13, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0, 582.0, 587.0, 582.0, 525.0, 478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0, 582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1488626119910377, "mean_processing_ms": 0.297841305997747, "mean_inference_ms": 1.7053304969218863}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5160000, "num_steps_sampled": 2752000, "sample_time_ms": 20853.401, "load_time_ms": 36.595, "grad_time_ms": 9300.168, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004025696776807308, "policy_loss": -0.0038255956023931503, "vf_loss": 84.18643951416016, "vf_explained_var": 0.7665885090827942, "kl": 0.0019039264880120754, "entropy": 1.1346958875656128, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2752000, "episodes_total": 6880, "training_iteration": 215, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-32-49", "timestamp": 1660253569, "time_this_iter_s": 30.592424869537354, "time_total_s": 11984.972425699234, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11984.972425699234, "timesteps_since_restore": 2752000, "iterations_since_restore": 215, "perf": {"cpu_util_percent": 30.46511627906977, "ram_util_percent": 58.576744186046504}} +{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 589.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.55}, "custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 181.1, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.05, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.94, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.02, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.88, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.09, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.8, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.09, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.09, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0, 630.0, 582.0, 582.0, 579.0, 582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0, 636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1456158536618073, "mean_processing_ms": 0.29719304474995795, "mean_inference_ms": 1.7020409366400755}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5184000, "num_steps_sampled": 2764800, "sample_time_ms": 20854.663, "load_time_ms": 36.839, "grad_time_ms": 9330.064, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0055513703264296055, "policy_loss": -0.0025626528076827526, "vf_loss": 86.77967071533203, "vf_explained_var": 0.7667043805122375, "kl": 0.00211916770786047, "entropy": 1.1278961896896362, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2764800, "episodes_total": 6912, "training_iteration": 216, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-33-19", "timestamp": 1660253599, "time_this_iter_s": 29.99899387359619, "time_total_s": 12014.97141957283, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12014.97141957283, "timesteps_since_restore": 2764800, "iterations_since_restore": 216, "perf": {"cpu_util_percent": 32.32380952380952, "ram_util_percent": 58.607142857142854}} +{"episode_reward_max": 639.0, "episode_reward_min": 456.0, "episode_reward_mean": 596.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 298.185}, "custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.17, "shaped_reward_min": 136, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.31, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.48, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.31, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.31, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0, 579.0, 621.0, 582.0, 519.0, 636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0, 576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1424035037874523, "mean_processing_ms": 0.29655443936404674, "mean_inference_ms": 1.6989240589354977}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5208000, "num_steps_sampled": 2777600, "sample_time_ms": 20971.783, "load_time_ms": 36.769, "grad_time_ms": 9428.242, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002736276714131236, "policy_loss": -0.004988871049135923, "vf_loss": 82.89418029785156, "vf_explained_var": 0.7724503874778748, "kl": 0.00226503680460155, "entropy": 1.1285419464111328, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2777600, "episodes_total": 6944, "training_iteration": 217, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-33-52", "timestamp": 1660253632, "time_this_iter_s": 32.39657115936279, "time_total_s": 12047.367990732193, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12047.367990732193, "timesteps_since_restore": 2777600, "iterations_since_restore": 217, "perf": {"cpu_util_percent": 33.11304347826087, "ram_util_percent": 58.56521739130436}} +{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 593.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 296.97}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 183.14, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0, 630.0, 630.0, 582.0, 636.0, 576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0, 627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1392367943050357, "mean_processing_ms": 0.29592632211468906, "mean_inference_ms": 1.6959597907664128}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5232000, "num_steps_sampled": 2790400, "sample_time_ms": 21233.48, "load_time_ms": 36.919, "grad_time_ms": 9592.49, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006457938347011805, "policy_loss": -0.0026744985952973366, "vf_loss": 97.0146713256836, "vf_explained_var": 0.7470273375511169, "kl": 0.0016420072643086314, "entropy": 1.1380563974380493, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2790400, "episodes_total": 6976, "training_iteration": 218, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-34-25", "timestamp": 1660253665, "time_this_iter_s": 33.2370343208313, "time_total_s": 12080.605025053024, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12080.605025053024, "timesteps_since_restore": 2790400, "iterations_since_restore": 218, "perf": {"cpu_util_percent": 35.75531914893618, "ram_util_percent": 58.63191489361703}} +{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 593.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 296.79}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 182.78, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.81, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.31, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.81, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.31, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.81, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.31, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0, 633.0, 587.0, 582.0, 630.0, 627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0, 582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1361092001504756, "mean_processing_ms": 0.29530600936138574, "mean_inference_ms": 1.6931104739373604}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5256000, "num_steps_sampled": 2803200, "sample_time_ms": 21336.415, "load_time_ms": 37.483, "grad_time_ms": 9561.229, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005191893433220685, "policy_loss": -0.0074623264372348785, "vf_loss": 85.4925765991211, "vf_explained_var": 0.7601101994514465, "kl": 0.0019686671439558268, "entropy": 1.1354910135269165, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2803200, "episodes_total": 7008, "training_iteration": 219, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-34-55", "timestamp": 1660253695, "time_this_iter_s": 30.000843048095703, "time_total_s": 12110.60586810112, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12110.60586810112, "timesteps_since_restore": 2803200, "iterations_since_restore": 219, "perf": {"cpu_util_percent": 30.46666666666667, "ram_util_percent": 58.68571428571429}} +{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 588.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 294.01}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.62, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0, 579.0, 582.0, 576.0, 636.0, 582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0, 636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.133009827763421, "mean_processing_ms": 0.29469029655090995, "mean_inference_ms": 1.6902501086005228}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5280000, "num_steps_sampled": 2816000, "sample_time_ms": 21400.362, "load_time_ms": 37.346, "grad_time_ms": 9606.188, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002275400562211871, "policy_loss": -0.0062116296030581, "vf_loss": 90.5528793334961, "vf_explained_var": 0.7516798973083496, "kl": 0.0019114302704110742, "entropy": 1.1365100145339966, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2816000, "episodes_total": 7040, "training_iteration": 220, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-35-26", "timestamp": 1660253726, "time_this_iter_s": 30.873941659927368, "time_total_s": 12141.479809761047, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12141.479809761047, "timesteps_since_restore": 2816000, "iterations_since_restore": 220, "perf": {"cpu_util_percent": 31.518181818181816, "ram_util_percent": 58.6159090909091}} +{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 590.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 295.165}, "custom_metrics": {"sparse_reward_mean": 204.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.93, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.64, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.09, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0, 291.0, 587.0, 579.0, 582.0, 636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0, 630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1299217967931974, "mean_processing_ms": 0.29407401911825776, "mean_inference_ms": 1.6873151851132406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5304000, "num_steps_sampled": 2828800, "sample_time_ms": 21404.207, "load_time_ms": 36.943, "grad_time_ms": 9639.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004844650160521269, "policy_loss": -0.004174739122390747, "vf_loss": 95.8445816040039, "vf_explained_var": 0.7459821701049805, "kl": 0.0019909220281988382, "entropy": 1.1301350593566895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2828800, "episodes_total": 7072, "training_iteration": 221, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-35-57", "timestamp": 1660253757, "time_this_iter_s": 30.906293869018555, "time_total_s": 12172.386103630066, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12172.386103630066, "timesteps_since_restore": 2828800, "iterations_since_restore": 221, "perf": {"cpu_util_percent": 27.49545454545455, "ram_util_percent": 58.63636363636363}} +{"episode_reward_max": 639.0, "episode_reward_min": 487.0, "episode_reward_mean": 596.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 298.01}, "custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.62, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.36, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.85, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.1, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.04, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.04, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.04, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0, 582.0, 567.0, 633.0, 582.0, 630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0, 582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1268569615111614, "mean_processing_ms": 0.2934624000398477, "mean_inference_ms": 1.6844698808001166}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5328000, "num_steps_sampled": 2841600, "sample_time_ms": 21356.225, "load_time_ms": 36.973, "grad_time_ms": 9654.508, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037463188637048006, "policy_loss": -0.0037510646507143974, "vf_loss": 80.60189056396484, "vf_explained_var": 0.7646245360374451, "kl": 0.002355078933760524, "entropy": 1.125628113746643, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2841600, "episodes_total": 7104, "training_iteration": 222, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-36-29", "timestamp": 1660253789, "time_this_iter_s": 31.9894540309906, "time_total_s": 12204.375557661057, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12204.375557661057, "timesteps_since_restore": 2841600, "iterations_since_restore": 222, "perf": {"cpu_util_percent": 30.18222222222223, "ram_util_percent": 58.70444444444445}} +{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 601.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.665}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.13, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.26, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.21, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.21, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.21, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0, 579.0, 579.0, 627.0, 630.0, 582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0, 582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.123810088157581, "mean_processing_ms": 0.2928524721479363, "mean_inference_ms": 1.6814399707435803}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5352000, "num_steps_sampled": 2854400, "sample_time_ms": 21091.047, "load_time_ms": 36.918, "grad_time_ms": 9784.155, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -5.6165892601711676e-05, "policy_loss": -0.007852478884160519, "vf_loss": 83.60053253173828, "vf_explained_var": 0.7575058937072754, "kl": 0.001709200325421989, "entropy": 1.1274746656417847, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2854400, "episodes_total": 7136, "training_iteration": 223, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-36-59", "timestamp": 1660253819, "time_this_iter_s": 29.606478929519653, "time_total_s": 12233.982036590576, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12233.982036590576, "timesteps_since_restore": 2854400, "iterations_since_restore": 223, "perf": {"cpu_util_percent": 31.040476190476188, "ram_util_percent": 58.538095238095245}} +{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 595.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 297.745}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.69, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0, 636.0, 582.0, 633.0, 582.0, 582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0, 582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1207907910306545, "mean_processing_ms": 0.2922476172198019, "mean_inference_ms": 1.6784104187428721}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5376000, "num_steps_sampled": 2867200, "sample_time_ms": 21165.857, "load_time_ms": 36.996, "grad_time_ms": 9704.336, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003032231703400612, "policy_loss": -0.005307988729327917, "vf_loss": 89.09744262695312, "vf_explained_var": 0.7809851765632629, "kl": 0.0017985772574320436, "entropy": 1.139058232307434, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2867200, "episodes_total": 7168, "training_iteration": 224, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-37-28", "timestamp": 1660253848, "time_this_iter_s": 29.72331213951111, "time_total_s": 12263.705348730087, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12263.705348730087, "timesteps_since_restore": 2867200, "iterations_since_restore": 224, "perf": {"cpu_util_percent": 31.121428571428574, "ram_util_percent": 58.642857142857146}} +{"episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 594.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 297.15}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.3, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0, 582.0, 627.0, 627.0, 576.0, 582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.117796938264296, "mean_processing_ms": 0.29164906507723115, "mean_inference_ms": 1.675395869901085}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5400000, "num_steps_sampled": 2880000, "sample_time_ms": 21210.508, "load_time_ms": 36.928, "grad_time_ms": 9651.029, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003380303969606757, "policy_loss": -0.0046376134268939495, "vf_loss": 85.82404327392578, "vf_explained_var": 0.7595102190971375, "kl": 0.0017190409125760198, "entropy": 1.1289840936660767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2880000, "episodes_total": 7200, "training_iteration": 225, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-37-59", "timestamp": 1660253879, "time_this_iter_s": 30.507438898086548, "time_total_s": 12294.212787628174, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12294.212787628174, "timesteps_since_restore": 2880000, "iterations_since_restore": 225, "perf": {"cpu_util_percent": 29.595348837209304, "ram_util_percent": 58.604651162790695}} +{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 592.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 296.345}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.29, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0, 582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1148293138411964, "mean_processing_ms": 0.29105729699863353, "mean_inference_ms": 1.6724660361311725}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5424000, "num_steps_sampled": 2892800, "sample_time_ms": 21194.238, "load_time_ms": 37.016, "grad_time_ms": 9566.171, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004350067116320133, "policy_loss": -0.004312645178288221, "vf_loss": 92.26403045654297, "vf_explained_var": 0.7493538856506348, "kl": 0.0016388074727728963, "entropy": 1.1273828744888306, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2892800, "episodes_total": 7232, "training_iteration": 226, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-38-28", "timestamp": 1660253908, "time_this_iter_s": 28.989330291748047, "time_total_s": 12323.202117919922, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12323.202117919922, "timesteps_since_restore": 2892800, "iterations_since_restore": 226, "perf": {"cpu_util_percent": 31.509756097560977, "ram_util_percent": 58.60975609756099}} +{"episode_reward_max": 639.0, "episode_reward_min": 419.0, "episode_reward_mean": 596.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 205.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 298.35}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 183.9, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0, 419.0, 587.0, 633.0, 587.0, 582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0, 630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.111882925643259, "mean_processing_ms": 0.2904711783343595, "mean_inference_ms": 1.6695128259184024}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5448000, "num_steps_sampled": 2905600, "sample_time_ms": 21046.741, "load_time_ms": 36.918, "grad_time_ms": 9303.678, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001480274717323482, "policy_loss": -0.006882220506668091, "vf_loss": 89.27208709716797, "vf_explained_var": 0.7621426582336426, "kl": 0.0023567674215883017, "entropy": 1.1294348239898682, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2905600, "episodes_total": 7264, "training_iteration": 227, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-38-56", "timestamp": 1660253936, "time_this_iter_s": 28.29434609413147, "time_total_s": 12351.496464014053, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12351.496464014053, "timesteps_since_restore": 2905600, "iterations_since_restore": 227, "perf": {"cpu_util_percent": 34.097500000000004, "ram_util_percent": 58.625000000000014}} +{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 598.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.365}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.33, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0, 633.0, 582.0, 570.0, 630.0, 630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0, 579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.108955063453815, "mean_processing_ms": 0.2898914011168066, "mean_inference_ms": 1.6664528501755567}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5472000, "num_steps_sampled": 2918400, "sample_time_ms": 20842.466, "load_time_ms": 36.811, "grad_time_ms": 9088.977, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0036609917879104614, "policy_loss": -0.0044582299888134, "vf_loss": 86.8133316040039, "vf_explained_var": 0.7590463161468506, "kl": 0.0019074537558481097, "entropy": 1.124218463897705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2918400, "episodes_total": 7296, "training_iteration": 228, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-39-25", "timestamp": 1660253965, "time_this_iter_s": 29.044671058654785, "time_total_s": 12380.541135072708, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12380.541135072708, "timesteps_since_restore": 2918400, "iterations_since_restore": 228, "perf": {"cpu_util_percent": 34.02195121951219, "ram_util_percent": 58.739024390243905}} +{"episode_reward_max": 636.0, "episode_reward_min": 524.0, "episode_reward_mean": 595.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 297.94}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.88, "shaped_reward_min": 164, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0, 579.0, 582.0, 533.0, 582.0, 579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0, 576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1060616527900746, "mean_processing_ms": 0.28931749831274756, "mean_inference_ms": 1.663577876904456}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5496000, "num_steps_sampled": 2931200, "sample_time_ms": 20902.121, "load_time_ms": 36.225, "grad_time_ms": 9147.239, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004833377432078123, "policy_loss": -0.003439890220761299, "vf_loss": 88.37776947021484, "vf_explained_var": 0.7585814595222473, "kl": 0.0015477427514269948, "entropy": 1.1290167570114136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2931200, "episodes_total": 7328, "training_iteration": 229, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-39-56", "timestamp": 1660253996, "time_this_iter_s": 31.170966863632202, "time_total_s": 12411.71210193634, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12411.71210193634, "timesteps_since_restore": 2931200, "iterations_since_restore": 229, "perf": {"cpu_util_percent": 34.31136363636364, "ram_util_percent": 58.67272727272726}} +{"episode_reward_max": 636.0, "episode_reward_min": 524.0, "episode_reward_mean": 598.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 299.285}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.57, "shaped_reward_min": 164, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.43, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0, 582.0, 582.0, 587.0, 636.0, 576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0, 636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1032059640461922, "mean_processing_ms": 0.28875135486760906, "mean_inference_ms": 1.6608895336787544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5520000, "num_steps_sampled": 2944000, "sample_time_ms": 20966.765, "load_time_ms": 36.396, "grad_time_ms": 9172.251, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005542902275919914, "policy_loss": -0.0032051329035311937, "vf_loss": 93.1218490600586, "vf_explained_var": 0.7535824775695801, "kl": 0.0018033984815701842, "entropy": 1.1283119916915894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2944000, "episodes_total": 7360, "training_iteration": 230, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-40-28", "timestamp": 1660254028, "time_this_iter_s": 31.772056102752686, "time_total_s": 12443.484158039093, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12443.484158039093, "timesteps_since_restore": 2944000, "iterations_since_restore": 230, "perf": {"cpu_util_percent": 32.88666666666666, "ram_util_percent": 58.577777777777776}} +{"episode_reward_max": 636.0, "episode_reward_min": 444.0, "episode_reward_mean": 595.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 297.695}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.79, "shaped_reward_min": 124, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0, 630.0, 633.0, 576.0, 582.0, 636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0, 630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 1.100385875299439, "mean_processing_ms": 0.2881910582639845, "mean_inference_ms": 1.6584370926312206}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5544000, "num_steps_sampled": 2956800, "sample_time_ms": 21086.396, "load_time_ms": 36.806, "grad_time_ms": 9224.029, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032783111091703176, "policy_loss": -0.005397517699748278, "vf_loss": 92.44506072998047, "vf_explained_var": 0.7564309239387512, "kl": 0.001717855571769178, "entropy": 1.137366771697998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2956800, "episodes_total": 7392, "training_iteration": 231, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-41-01", "timestamp": 1660254061, "time_this_iter_s": 32.62551975250244, "time_total_s": 12476.109677791595, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12476.109677791595, "timesteps_since_restore": 2956800, "iterations_since_restore": 231, "perf": {"cpu_util_percent": 34.25652173913044, "ram_util_percent": 58.589130434782625}} +{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 597.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 298.635}, "custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.07, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.1, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 582.0, 573.0, 633.0, 630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0, 527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0975911945131436, "mean_processing_ms": 0.28763548597333904, "mean_inference_ms": 1.656000718644699}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5568000, "num_steps_sampled": 2969600, "sample_time_ms": 21056.358, "load_time_ms": 36.577, "grad_time_ms": 9266.344, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002132798545062542, "policy_loss": -0.0064245969988405704, "vf_loss": 91.22052001953125, "vf_explained_var": 0.7570000290870667, "kl": 0.002030483214184642, "entropy": 1.129306674003601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2969600, "episodes_total": 7424, "training_iteration": 232, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-41-33", "timestamp": 1660254093, "time_this_iter_s": 32.109358072280884, "time_total_s": 12508.219035863876, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12508.219035863876, "timesteps_since_restore": 2969600, "iterations_since_restore": 232, "perf": {"cpu_util_percent": 33.97777777777779, "ram_util_percent": 58.6088888888889}} +{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 596.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 298.275}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.75, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.46, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.26, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.11, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.11, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.11, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0, 633.0, 633.0, 636.0, 579.0, 527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0948199723537093, "mean_processing_ms": 0.2870853418047666, "mean_inference_ms": 1.653643049405544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5592000, "num_steps_sampled": 2982400, "sample_time_ms": 21374.201, "load_time_ms": 36.582, "grad_time_ms": 9304.523, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033536478877067566, "policy_loss": -0.004937517922371626, "vf_loss": 88.6025161743164, "vf_explained_var": 0.7515634894371033, "kl": 0.0023627106565982103, "entropy": 1.138161540031433, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2982400, "episodes_total": 7456, "training_iteration": 233, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-42-06", "timestamp": 1660254126, "time_this_iter_s": 33.16590905189514, "time_total_s": 12541.384944915771, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12541.384944915771, "timesteps_since_restore": 2982400, "iterations_since_restore": 233, "perf": {"cpu_util_percent": 33.06170212765958, "ram_util_percent": 58.5808510638298}} +{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 598.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 299.29}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.18, "shaped_reward_min": 170, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0, 579.0, 579.0, 633.0, 536.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0, 579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0920719973611583, "mean_processing_ms": 0.2865396383505603, "mean_inference_ms": 1.6512949554166665}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5616000, "num_steps_sampled": 2995200, "sample_time_ms": 21497.979, "load_time_ms": 36.457, "grad_time_ms": 9378.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021230385173112154, "policy_loss": -0.0060439333319664, "vf_loss": 87.32781982421875, "vf_explained_var": 0.7546737194061279, "kl": 0.0017831752775236964, "entropy": 1.1316334009170532, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2995200, "episodes_total": 7488, "training_iteration": 234, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-42-38", "timestamp": 1660254158, "time_this_iter_s": 31.695109128952026, "time_total_s": 12573.080054044724, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12573.080054044724, "timesteps_since_restore": 2995200, "iterations_since_restore": 234, "perf": {"cpu_util_percent": 33.72888888888888, "ram_util_percent": 58.67111111111112}} +{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 599.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 299.585}, "custom_metrics": {"sparse_reward_mean": 207.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.37, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0, 579.0, 633.0, 627.0, 633.0, 579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0, 630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0893523875509898, "mean_processing_ms": 0.2860034188911333, "mean_inference_ms": 1.6490498343131736}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5640000, "num_steps_sampled": 3008000, "sample_time_ms": 21585.027, "load_time_ms": 36.709, "grad_time_ms": 9663.091, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033673776779323816, "policy_loss": -0.0045895627699792385, "vf_loss": 85.23816680908203, "vf_explained_var": 0.7584102749824524, "kl": 0.0018025357276201248, "entropy": 1.1337394714355469, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3008000, "episodes_total": 7520, "training_iteration": 235, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-43-12", "timestamp": 1660254192, "time_this_iter_s": 34.23338508605957, "time_total_s": 12607.313439130783, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12607.313439130783, "timesteps_since_restore": 3008000, "iterations_since_restore": 235, "perf": {"cpu_util_percent": 33.239583333333336, "ram_util_percent": 58.65}} +{"episode_reward_max": 639.0, "episode_reward_min": 541.0, "episode_reward_mean": 603.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 301.58}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.56, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.55, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.73, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.86, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.71, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0, 630.0, 627.0, 630.0, 630.0, 630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0866480049346467, "mean_processing_ms": 0.2854697984995614, "mean_inference_ms": 1.6467374423655963}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5664000, "num_steps_sampled": 3020800, "sample_time_ms": 21716.507, "load_time_ms": 36.498, "grad_time_ms": 9682.814, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004189230967313051, "policy_loss": -0.003748750314116478, "vf_loss": 85.03255462646484, "vf_explained_var": 0.76678067445755, "kl": 0.001733882469125092, "entropy": 1.130557656288147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3020800, "episodes_total": 7552, "training_iteration": 236, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-43-43", "timestamp": 1660254223, "time_this_iter_s": 30.502008199691772, "time_total_s": 12637.815447330475, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12637.815447330475, "timesteps_since_restore": 3020800, "iterations_since_restore": 236, "perf": {"cpu_util_percent": 34.461363636363636, "ram_util_percent": 59.190909090909095}} +{"episode_reward_max": 639.0, "episode_reward_min": 515.0, "episode_reward_mean": 596.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 298.165}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.53, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.6, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.98, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.51, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.51, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.51, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0, 587.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0, 633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0839571838028972, "mean_processing_ms": 0.28493791430015475, "mean_inference_ms": 1.6442666845730367}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5688000, "num_steps_sampled": 3033600, "sample_time_ms": 21735.725, "load_time_ms": 37.102, "grad_time_ms": 9981.103, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00033502434962429106, "policy_loss": -0.007877787575125694, "vf_loss": 87.85860443115234, "vf_explained_var": 0.7610828280448914, "kl": 0.0018075080588459969, "entropy": 1.1460970640182495, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3033600, "episodes_total": 7584, "training_iteration": 237, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-44-14", "timestamp": 1660254254, "time_this_iter_s": 31.47483992576599, "time_total_s": 12669.29028725624, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12669.29028725624, "timesteps_since_restore": 3033600, "iterations_since_restore": 237, "perf": {"cpu_util_percent": 33.54772727272728, "ram_util_percent": 58.545454545454554}} +{"episode_reward_max": 639.0, "episode_reward_min": 515.0, "episode_reward_mean": 598.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.205}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.41, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.61, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0, 579.0, 636.0, 584.0, 541.0, 633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0, 582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0812829517018399, "mean_processing_ms": 0.2844054156337277, "mean_inference_ms": 1.641726673758305}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5712000, "num_steps_sampled": 3046400, "sample_time_ms": 21856.898, "load_time_ms": 37.298, "grad_time_ms": 10099.958, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027887988835573196, "policy_loss": -0.005770063493400812, "vf_loss": 91.25625610351562, "vf_explained_var": 0.7579948306083679, "kl": 0.001784983091056347, "entropy": 1.1335158348083496, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3046400, "episodes_total": 7616, "training_iteration": 238, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-44-46", "timestamp": 1660254286, "time_this_iter_s": 31.44696879386902, "time_total_s": 12700.73725605011, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12700.73725605011, "timesteps_since_restore": 3046400, "iterations_since_restore": 238, "perf": {"cpu_util_percent": 33.757777777777775, "ram_util_percent": 58.49555555555556}} +{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 593.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 296.655}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 182.91, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0, 582.0, 639.0, 579.0, 630.0, 582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0, 633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0]}, "sampler_perf": {"mean_env_wait_ms": 1.078628112981745, "mean_processing_ms": 0.2838764625844994, "mean_inference_ms": 1.6391328483371586}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5736000, "num_steps_sampled": 3059200, "sample_time_ms": 21753.38, "load_time_ms": 37.651, "grad_time_ms": 10111.988, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005386353936046362, "policy_loss": -0.003314490430057049, "vf_loss": 92.68680572509766, "vf_explained_var": 0.7602830529212952, "kl": 0.0021554683335125446, "entropy": 1.135677456855774, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3059200, "episodes_total": 7648, "training_iteration": 239, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-45-16", "timestamp": 1660254316, "time_this_iter_s": 30.261106967926025, "time_total_s": 12730.998363018036, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12730.998363018036, "timesteps_since_restore": 3059200, "iterations_since_restore": 239, "perf": {"cpu_util_percent": 33.04761904761905, "ram_util_percent": 58.58571428571428}} +{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 598.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.29}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 184.58, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.27, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.47, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.22, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.22, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.22, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 558.0, 633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0, 579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0759937679443485, "mean_processing_ms": 0.28335070451542615, "mean_inference_ms": 1.6365163711120108}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5760000, "num_steps_sampled": 3072000, "sample_time_ms": 21577.613, "load_time_ms": 37.508, "grad_time_ms": 10049.353, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017478683730587363, "policy_loss": -0.006376888602972031, "vf_loss": 86.9516372680664, "vf_explained_var": 0.7652549743652344, "kl": 0.0021124929189682007, "entropy": 1.1408079862594604, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3072000, "episodes_total": 7680, "training_iteration": 240, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-45-45", "timestamp": 1660254345, "time_this_iter_s": 29.390948057174683, "time_total_s": 12760.38931107521, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12760.38931107521, "timesteps_since_restore": 3072000, "iterations_since_restore": 240, "perf": {"cpu_util_percent": 32.80238095238095, "ram_util_percent": 58.55238095238095}} +{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 595.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 297.615}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 183.63, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0, 636.0, 579.0, 587.0, 573.0, 579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0, 582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0733850104445417, "mean_processing_ms": 0.2828300453202057, "mean_inference_ms": 1.6339069456399316}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5784000, "num_steps_sampled": 3084800, "sample_time_ms": 21470.585, "load_time_ms": 37.079, "grad_time_ms": 10121.915, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020955076906830072, "policy_loss": -0.006229180842638016, "vf_loss": 88.9510269165039, "vf_explained_var": 0.7567486763000488, "kl": 0.0017531089251860976, "entropy": 1.140811562538147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3084800, "episodes_total": 7712, "training_iteration": 241, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-46-18", "timestamp": 1660254378, "time_this_iter_s": 32.28085994720459, "time_total_s": 12792.670171022415, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12792.670171022415, "timesteps_since_restore": 3084800, "iterations_since_restore": 241, "perf": {"cpu_util_percent": 31.733333333333334, "ram_util_percent": 58.655555555555544}} +{"episode_reward_max": 639.0, "episode_reward_min": 458.0, "episode_reward_mean": 603.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.565}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.53, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0, 636.0, 587.0, 627.0, 636.0, 582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0, 627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0708107094165102, "mean_processing_ms": 0.2823168700385721, "mean_inference_ms": 1.631412939127947}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5808000, "num_steps_sampled": 3097600, "sample_time_ms": 21506.392, "load_time_ms": 37.292, "grad_time_ms": 9971.815, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004354400560259819, "policy_loss": -0.0035108765587210655, "vf_loss": 84.29744720458984, "vf_explained_var": 0.7617435455322266, "kl": 0.0018548279767856002, "entropy": 1.1289268732070923, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3097600, "episodes_total": 7744, "training_iteration": 242, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-46-49", "timestamp": 1660254409, "time_this_iter_s": 30.967852115631104, "time_total_s": 12823.638023138046, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12823.638023138046, "timesteps_since_restore": 3097600, "iterations_since_restore": 242, "perf": {"cpu_util_percent": 31.486363636363638, "ram_util_percent": 58.63863636363636}} +{"episode_reward_max": 639.0, "episode_reward_min": 458.0, "episode_reward_mean": 602.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.375}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.15, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.82, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.78, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0, 570.0, 630.0, 587.0, 582.0, 627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0, 627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.068272032402952, "mean_processing_ms": 0.2818129859025947, "mean_inference_ms": 1.6291161273108918}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5832000, "num_steps_sampled": 3110400, "sample_time_ms": 21475.079, "load_time_ms": 37.422, "grad_time_ms": 9892.717, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034763356670737267, "policy_loss": -0.004455787595361471, "vf_loss": 84.99886322021484, "vf_explained_var": 0.7575659155845642, "kl": 0.0017217934364452958, "entropy": 1.135510802268982, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3110400, "episodes_total": 7776, "training_iteration": 243, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-47-21", "timestamp": 1660254441, "time_this_iter_s": 32.067052125930786, "time_total_s": 12855.705075263977, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12855.705075263977, "timesteps_since_restore": 3110400, "iterations_since_restore": 243, "perf": {"cpu_util_percent": 31.317777777777778, "ram_util_percent": 58.61555555555556}} +{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 602.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.15}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.7, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.35, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.35, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.35, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0, 582.0, 633.0, 579.0, 539.0, 627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0, 630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.065748558737823, "mean_processing_ms": 0.28131319823148404, "mean_inference_ms": 1.6267302844305909}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5856000, "num_steps_sampled": 3123200, "sample_time_ms": 21258.847, "load_time_ms": 37.597, "grad_time_ms": 9965.955, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00222679297439754, "policy_loss": -0.005516994744539261, "vf_loss": 83.11483764648438, "vf_explained_var": 0.7694733142852783, "kl": 0.002387256594374776, "entropy": 1.135390281677246, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3123200, "episodes_total": 7808, "training_iteration": 244, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-47-51", "timestamp": 1660254471, "time_this_iter_s": 30.266911029815674, "time_total_s": 12885.971986293793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12885.971986293793, "timesteps_since_restore": 3123200, "iterations_since_restore": 244, "perf": {"cpu_util_percent": 32.02093023255814, "ram_util_percent": 58.54186046511629}} +{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 599.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 299.72}, "custom_metrics": {"sparse_reward_mean": 207.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.24, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.28, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.28, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.28, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0, 630.0, 582.0, 636.0, 627.0, 630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0, 627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0632381583217039, "mean_processing_ms": 0.2808155253638906, "mean_inference_ms": 1.6242198083388235}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5880000, "num_steps_sampled": 3136000, "sample_time_ms": 21038.899, "load_time_ms": 37.35, "grad_time_ms": 9776.148, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004664632026106119, "policy_loss": -0.003766902955248952, "vf_loss": 90.03823852539062, "vf_explained_var": 0.7575922012329102, "kl": 0.002137060509994626, "entropy": 1.1445802450180054, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3136000, "episodes_total": 7840, "training_iteration": 245, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-48-21", "timestamp": 1660254501, "time_this_iter_s": 30.129722118377686, "time_total_s": 12916.10170841217, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12916.10170841217, "timesteps_since_restore": 3136000, "iterations_since_restore": 245, "perf": {"cpu_util_percent": 34.127906976744185, "ram_util_percent": 58.56744186046512}} +{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 600.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.025}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.45, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.75, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.61, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.61, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.61, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0, 579.0, 633.0, 627.0, 544.0, 627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0, 627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0607418368814732, "mean_processing_ms": 0.280319937755019, "mean_inference_ms": 1.6216711984881527}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5904000, "num_steps_sampled": 3148800, "sample_time_ms": 21042.313, "load_time_ms": 37.258, "grad_time_ms": 9784.882, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003808257170021534, "policy_loss": -0.0040723783895373344, "vf_loss": 84.46407318115234, "vf_explained_var": 0.7558939456939697, "kl": 0.0020272734109312296, "entropy": 1.1315315961837769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3148800, "episodes_total": 7872, "training_iteration": 246, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-48-52", "timestamp": 1660254532, "time_this_iter_s": 30.6191668510437, "time_total_s": 12946.720875263214, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12946.720875263214, "timesteps_since_restore": 3148800, "iterations_since_restore": 246, "perf": {"cpu_util_percent": 29.048837209302324, "ram_util_percent": 58.57906976744185}} +{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 601.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.71}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.02, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.97, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.91, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0, 468.0, 636.0, 630.0, 504.0, 627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0, 627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0582836436059244, "mean_processing_ms": 0.2798326074687889, "mean_inference_ms": 1.6195165404601743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5928000, "num_steps_sampled": 3161600, "sample_time_ms": 21417.89, "load_time_ms": 36.65, "grad_time_ms": 9717.385, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007093754131346941, "policy_loss": -0.0009618126205168664, "vf_loss": 86.259033203125, "vf_explained_var": 0.7558541893959045, "kl": 0.001976242521777749, "entropy": 1.140650749206543, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3161600, "episodes_total": 7904, "training_iteration": 247, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-49-26", "timestamp": 1660254566, "time_this_iter_s": 34.550382137298584, "time_total_s": 12981.271257400513, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12981.271257400513, "timesteps_since_restore": 3161600, "iterations_since_restore": 247, "perf": {"cpu_util_percent": 29.197959183673472, "ram_util_percent": 58.64285714285715}} +{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 608.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.075}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.55, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.87, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0, 627.0, 527.0, 624.0, 587.0, 627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0, 633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.055854669948681, "mean_processing_ms": 0.27935130516970164, "mean_inference_ms": 1.6177570824217435}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5952000, "num_steps_sampled": 3174400, "sample_time_ms": 21678.331, "load_time_ms": 37.099, "grad_time_ms": 9688.848, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009431429207324982, "policy_loss": -0.008797372691333294, "vf_loss": 84.26534271240234, "vf_explained_var": 0.7609202265739441, "kl": 0.001977160107344389, "entropy": 1.14460289478302, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3174400, "episodes_total": 7936, "training_iteration": 248, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-50-00", "timestamp": 1660254600, "time_this_iter_s": 33.773277044296265, "time_total_s": 13015.044534444809, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13015.044534444809, "timesteps_since_restore": 3174400, "iterations_since_restore": 248, "perf": {"cpu_util_percent": 27.302083333333332, "ram_util_percent": 58.68958333333333}} +{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 606.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 303.215}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.43, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.82, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0, 630.0, 582.0, 636.0, 630.0, 633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0, 636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0]}, "sampler_perf": {"mean_env_wait_ms": 1.053454756557531, "mean_processing_ms": 0.27887402151447716, "mean_inference_ms": 1.6161016017922192}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5976000, "num_steps_sampled": 3187200, "sample_time_ms": 21826.606, "load_time_ms": 36.816, "grad_time_ms": 9749.499, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024596769362688065, "policy_loss": -0.005453174468129873, "vf_loss": 84.88723754882812, "vf_explained_var": 0.7672951221466064, "kl": 0.0021601892076432705, "entropy": 1.1517353057861328, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3187200, "episodes_total": 7968, "training_iteration": 249, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-50-33", "timestamp": 1660254633, "time_this_iter_s": 32.3484160900116, "time_total_s": 13047.39295053482, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13047.39295053482, "timesteps_since_restore": 3187200, "iterations_since_restore": 249, "perf": {"cpu_util_percent": 29.615217391304352, "ram_util_percent": 58.70434782608695}} +{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 604.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.285}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.77, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.29, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0, 579.0, 630.0, 582.0, 584.0, 636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0, 633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0510667862182812, "mean_processing_ms": 0.27840025138599184, "mean_inference_ms": 1.6143808795038155}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6000000, "num_steps_sampled": 3200000, "sample_time_ms": 22115.848, "load_time_ms": 36.609, "grad_time_ms": 9825.426, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0038353295531123877, "policy_loss": -0.004546869080513716, "vf_loss": 89.5432357788086, "vf_explained_var": 0.7639234662055969, "kl": 0.002313032979145646, "entropy": 1.144262671470642, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3200000, "episodes_total": 8000, "training_iteration": 250, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-51-06", "timestamp": 1660254666, "time_this_iter_s": 33.03909492492676, "time_total_s": 13080.432045459747, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13080.432045459747, "timesteps_since_restore": 3200000, "iterations_since_restore": 250, "perf": {"cpu_util_percent": 30.089130434782607, "ram_util_percent": 58.62826086956523}} +{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 605.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.845}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.09, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.03, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.8, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.94, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.94, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.94, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0, 636.0, 636.0, 584.0, 633.0, 633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0, 627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0486945411620672, "mean_processing_ms": 0.2779294488920563, "mean_inference_ms": 1.612487194421866}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6024000, "num_steps_sampled": 3212800, "sample_time_ms": 22207.762, "load_time_ms": 36.674, "grad_time_ms": 9691.515, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005205323453992605, "policy_loss": -0.0029822138603776693, "vf_loss": 87.57022857666016, "vf_explained_var": 0.7586490511894226, "kl": 0.0020639507565647364, "entropy": 1.1389611959457397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3212800, "episodes_total": 8032, "training_iteration": 251, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-51-37", "timestamp": 1660254697, "time_this_iter_s": 31.857529878616333, "time_total_s": 13112.289575338364, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13112.289575338364, "timesteps_since_restore": 3212800, "iterations_since_restore": 251, "perf": {"cpu_util_percent": 30.984444444444442, "ram_util_percent": 58.69777777777778}} +{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 606.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.325}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.25, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.27, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.22, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.8, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.8, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.8, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0, 579.0, 579.0, 630.0, 587.0, 627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0, 630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0463435116537838, "mean_processing_ms": 0.277465645143399, "mean_inference_ms": 1.6108149086882515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6048000, "num_steps_sampled": 3225600, "sample_time_ms": 22477.661, "load_time_ms": 36.527, "grad_time_ms": 9715.891, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003005747450515628, "policy_loss": -0.005394397769123316, "vf_loss": 89.67745208740234, "vf_explained_var": 0.7541216015815735, "kl": 0.0018617714522406459, "entropy": 1.1351839303970337, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3225600, "episodes_total": 8064, "training_iteration": 252, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-52-11", "timestamp": 1660254731, "time_this_iter_s": 33.91162323951721, "time_total_s": 13146.20119857788, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13146.20119857788, "timesteps_since_restore": 3225600, "iterations_since_restore": 252, "perf": {"cpu_util_percent": 29.32083333333333, "ram_util_percent": 58.725}} +{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 603.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.78}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.96, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0, 630.0, 630.0, 579.0, 596.0, 630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0, 636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0439919117658958, "mean_processing_ms": 0.2769978233028824, "mean_inference_ms": 1.608730530277712}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6072000, "num_steps_sampled": 3238400, "sample_time_ms": 22102.374, "load_time_ms": 36.62, "grad_time_ms": 9528.927, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007198518142104149, "policy_loss": -0.002070576651021838, "vf_loss": 98.38677215576172, "vf_explained_var": 0.7492752075195312, "kl": 0.0017245132476091385, "entropy": 1.1391605138778687, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3238400, "episodes_total": 8096, "training_iteration": 253, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-52-38", "timestamp": 1660254758, "time_this_iter_s": 26.443045139312744, "time_total_s": 13172.644243717194, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13172.644243717194, "timesteps_since_restore": 3238400, "iterations_since_restore": 253, "perf": {"cpu_util_percent": 30.831578947368424, "ram_util_percent": 58.665789473684214}} +{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 603.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 301.77}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.54, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0, 627.0, 630.0, 630.0, 582.0, 636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0, 582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.041648317929554, "mean_processing_ms": 0.27652981742042726, "mean_inference_ms": 1.6063886219680592}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6096000, "num_steps_sampled": 3251200, "sample_time_ms": 22038.921, "load_time_ms": 36.872, "grad_time_ms": 9318.447, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028869707603007555, "policy_loss": -0.005314534064382315, "vf_loss": 87.72003936767578, "vf_explained_var": 0.7518091797828674, "kl": 0.0020599865820258856, "entropy": 1.1409815549850464, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3251200, "episodes_total": 8128, "training_iteration": 254, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-53-05", "timestamp": 1660254785, "time_this_iter_s": 27.532819986343384, "time_total_s": 13200.177063703537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13200.177063703537, "timesteps_since_restore": 3251200, "iterations_since_restore": 254, "perf": {"cpu_util_percent": 32.46923076923077, "ram_util_percent": 58.67179487179486}} +{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 606.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 303.43}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.06, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.02, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.94, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0, 590.0, 633.0, 636.0, 633.0, 582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0, 633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0393196321657538, "mean_processing_ms": 0.2760628271968967, "mean_inference_ms": 1.603750642060521}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6120000, "num_steps_sampled": 3264000, "sample_time_ms": 22112.846, "load_time_ms": 37.19, "grad_time_ms": 9333.009, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005571722984313965, "policy_loss": -0.0022253356873989105, "vf_loss": 83.61035919189453, "vf_explained_var": 0.7589413523674011, "kl": 0.0018155118450522423, "entropy": 1.1279449462890625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3264000, "episodes_total": 8160, "training_iteration": 255, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-53-36", "timestamp": 1660254816, "time_this_iter_s": 31.015226125717163, "time_total_s": 13231.192289829254, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13231.192289829254, "timesteps_since_restore": 3264000, "iterations_since_restore": 255, "perf": {"cpu_util_percent": 32.91162790697674, "ram_util_percent": 59.26976744186045}} +{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 612.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.185}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.92, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.1, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0, 587.0, 636.0, 582.0, 582.0, 633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0, 576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0370215932262006, "mean_processing_ms": 0.275602527422642, "mean_inference_ms": 1.6014130194901954}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6144000, "num_steps_sampled": 3276800, "sample_time_ms": 22185.118, "load_time_ms": 37.192, "grad_time_ms": 9265.444, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001450125128030777, "policy_loss": -0.006554553750902414, "vf_loss": 85.70040893554688, "vf_explained_var": 0.7625378966331482, "kl": 0.0019486347446218133, "entropy": 1.1307319402694702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3276800, "episodes_total": 8192, "training_iteration": 256, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-54-07", "timestamp": 1660254847, "time_this_iter_s": 30.66650390625, "time_total_s": 13261.858793735504, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13261.858793735504, "timesteps_since_restore": 3276800, "iterations_since_restore": 256, "perf": {"cpu_util_percent": 32.206818181818186, "ram_util_percent": 58.78863636363636}} +{"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 611.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 305.745}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.09, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.22, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.76, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0, 627.0, 633.0, 627.0, 639.0, 576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0, 530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0347705534764586, "mean_processing_ms": 0.2751542992936445, "mean_inference_ms": 1.5997483422105416}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6168000, "num_steps_sampled": 3289600, "sample_time_ms": 22403.591, "load_time_ms": 37.585, "grad_time_ms": 9159.695, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004127854947000742, "policy_loss": -0.004435718059539795, "vf_loss": 91.31246185302734, "vf_explained_var": 0.7648020386695862, "kl": 0.0019896463491022587, "entropy": 1.1353529691696167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3289600, "episodes_total": 8224, "training_iteration": 257, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-54-43", "timestamp": 1660254883, "time_this_iter_s": 35.68418622016907, "time_total_s": 13297.542979955673, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13297.542979955673, "timesteps_since_restore": 3289600, "iterations_since_restore": 257, "perf": {"cpu_util_percent": 28.105999999999998, "ram_util_percent": 58.784}} +{"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 605.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.555}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.51, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.34, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0, 630.0, 636.0, 630.0, 636.0, 530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0, 630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.032534462731249, "mean_processing_ms": 0.2747100807108193, "mean_inference_ms": 1.5981869464429628}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6192000, "num_steps_sampled": 3302400, "sample_time_ms": 22164.935, "load_time_ms": 37.303, "grad_time_ms": 9157.709, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004157478455454111, "policy_loss": -0.004339639097452164, "vf_loss": 90.65621948242188, "vf_explained_var": 0.7486104965209961, "kl": 0.0018090683734044433, "entropy": 1.1370199918746948, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3302400, "episodes_total": 8256, "training_iteration": 258, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-55-14", "timestamp": 1660254914, "time_this_iter_s": 31.362817764282227, "time_total_s": 13328.905797719955, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13328.905797719955, "timesteps_since_restore": 3302400, "iterations_since_restore": 258, "perf": {"cpu_util_percent": 31.451111111111114, "ram_util_percent": 58.77555555555555}} +{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 602.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.23}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.46, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.12, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.12, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.12, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0, 552.0, 590.0, 582.0, 630.0, 630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0, 533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0303222005081312, "mean_processing_ms": 0.27427243552224245, "mean_inference_ms": 1.5968410183934156}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6216000, "num_steps_sampled": 3315200, "sample_time_ms": 22358.82, "load_time_ms": 37.268, "grad_time_ms": 9112.824, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012977579608559608, "policy_loss": -0.0070681399665772915, "vf_loss": 89.34113311767578, "vf_explained_var": 0.751798152923584, "kl": 0.0021080432925373316, "entropy": 1.1364187002182007, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3315200, "episodes_total": 8288, "training_iteration": 259, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-55-48", "timestamp": 1660254948, "time_this_iter_s": 33.836853981018066, "time_total_s": 13362.742651700974, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13362.742651700974, "timesteps_since_restore": 3315200, "iterations_since_restore": 259, "perf": {"cpu_util_percent": 29.470212765957445, "ram_util_percent": 58.776595744680854}} +{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 603.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.89}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.98, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0, 627.0, 579.0, 582.0, 633.0, 533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0, 587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0281105190585473, "mean_processing_ms": 0.27383411395901525, "mean_inference_ms": 1.5952417170076567}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6240000, "num_steps_sampled": 3328000, "sample_time_ms": 22352.291, "load_time_ms": 37.439, "grad_time_ms": 9262.711, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002568518975749612, "policy_loss": -0.005389755126088858, "vf_loss": 85.3111801147461, "vf_explained_var": 0.7668444514274597, "kl": 0.0014818129129707813, "entropy": 1.145686149597168, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3328000, "episodes_total": 8320, "training_iteration": 260, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-56-23", "timestamp": 1660254983, "time_this_iter_s": 34.4713191986084, "time_total_s": 13397.213970899582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13397.213970899582, "timesteps_since_restore": 3328000, "iterations_since_restore": 260, "perf": {"cpu_util_percent": 30.669387755102036, "ram_util_percent": 58.697959183673476}} +{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 602.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.485}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 184.57, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.11, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.11, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.11, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0, 630.0, 513.0, 582.0, 558.0, 587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0, 627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0259226733727673, "mean_processing_ms": 0.27340278893355835, "mean_inference_ms": 1.5937276386512644}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6264000, "num_steps_sampled": 3340800, "sample_time_ms": 22448.254, "load_time_ms": 37.569, "grad_time_ms": 9264.987, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003604738973081112, "policy_loss": -0.004238134250044823, "vf_loss": 84.2165298461914, "vf_explained_var": 0.770367443561554, "kl": 0.002041497267782688, "entropy": 1.1575653553009033, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3340800, "episodes_total": 8352, "training_iteration": 261, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-56-55", "timestamp": 1660255015, "time_this_iter_s": 32.84105324745178, "time_total_s": 13430.055024147034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13430.055024147034, "timesteps_since_restore": 3340800, "iterations_since_restore": 261, "perf": {"cpu_util_percent": 32.742553191489364, "ram_util_percent": 58.7659574468085}} +{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 605.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 302.81}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.62, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0, 627.0, 627.0, 587.0, 630.0, 627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0, 639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0237598370369554, "mean_processing_ms": 0.2729750841964574, "mean_inference_ms": 1.5924306371043695}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6288000, "num_steps_sampled": 3353600, "sample_time_ms": 22598.435, "load_time_ms": 37.639, "grad_time_ms": 9270.4, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002879177627619356, "policy_loss": -0.007559783756732941, "vf_loss": 84.22747802734375, "vf_explained_var": 0.7608786225318909, "kl": 0.0017919730162248015, "entropy": 1.1500838994979858, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3353600, "episodes_total": 8384, "training_iteration": 262, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-57-31", "timestamp": 1660255051, "time_this_iter_s": 35.47017812728882, "time_total_s": 13465.525202274323, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13465.525202274323, "timesteps_since_restore": 3353600, "iterations_since_restore": 262, "perf": {"cpu_util_percent": 25.712, "ram_util_percent": 58.78}} +{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 606.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.415}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.23, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.88, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.69, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 630.0, 627.0, 630.0, 579.0, 639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0, 633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0216145913316899, "mean_processing_ms": 0.27255014515744136, "mean_inference_ms": 1.591234908963895}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6312000, "num_steps_sampled": 3366400, "sample_time_ms": 23169.124, "load_time_ms": 37.393, "grad_time_ms": 9419.779, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005937855690717697, "policy_loss": -0.0020803138613700867, "vf_loss": 85.89215087890625, "vf_explained_var": 0.7588068842887878, "kl": 0.001875289366580546, "entropy": 1.142077088356018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3366400, "episodes_total": 8416, "training_iteration": 263, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-58-05", "timestamp": 1660255085, "time_this_iter_s": 33.64332914352417, "time_total_s": 13499.168531417847, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13499.168531417847, "timesteps_since_restore": 3366400, "iterations_since_restore": 263, "perf": {"cpu_util_percent": 28.2468085106383, "ram_util_percent": 58.74042553191488}} +{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 605.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.84}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.28, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.72, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 582.0, 633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0, 630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0194825436603638, "mean_processing_ms": 0.27212618264491645, "mean_inference_ms": 1.590112958914433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6336000, "num_steps_sampled": 3379200, "sample_time_ms": 23605.349, "load_time_ms": 37.251, "grad_time_ms": 9861.879, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006588895921595395, "policy_loss": -0.008630036376416683, "vf_loss": 85.43579864501953, "vf_explained_var": 0.770875871181488, "kl": 0.0017466336721554399, "entropy": 1.1448642015457153, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3379200, "episodes_total": 8448, "training_iteration": 264, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-58-41", "timestamp": 1660255121, "time_this_iter_s": 36.31201386451721, "time_total_s": 13535.480545282364, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13535.480545282364, "timesteps_since_restore": 3379200, "iterations_since_restore": 264, "perf": {"cpu_util_percent": 29.313461538461542, "ram_util_percent": 59.09038461538463}} +{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 605.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.72}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.04, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0, 587.0, 581.0, 627.0, 582.0, 630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0173660844031607, "mean_processing_ms": 0.27170594360174505, "mean_inference_ms": 1.5890528923225553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6360000, "num_steps_sampled": 3392000, "sample_time_ms": 24130.635, "load_time_ms": 37.048, "grad_time_ms": 9999.028, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024043002631515265, "policy_loss": -0.005360407754778862, "vf_loss": 83.37548828125, "vf_explained_var": 0.7662093043327332, "kl": 0.001953211845830083, "entropy": 1.145652174949646, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3392000, "episodes_total": 8480, "training_iteration": 265, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-59-19", "timestamp": 1660255159, "time_this_iter_s": 37.638370990753174, "time_total_s": 13573.118916273117, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13573.118916273117, "timesteps_since_restore": 3392000, "iterations_since_restore": 265, "perf": {"cpu_util_percent": 30.500000000000004, "ram_util_percent": 58.69433962264149}} +{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 604.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.405}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.21, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0, 582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0152699162864507, "mean_processing_ms": 0.27129002186513274, "mean_inference_ms": 1.588063531007753}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6384000, "num_steps_sampled": 3404800, "sample_time_ms": 24421.033, "load_time_ms": 37.381, "grad_time_ms": 10403.492, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023645039182156324, "policy_loss": -0.005783146247267723, "vf_loss": 87.20269775390625, "vf_explained_var": 0.7547242045402527, "kl": 0.0018250799039378762, "entropy": 1.1452516317367554, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3404800, "episodes_total": 8512, "training_iteration": 266, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-59-56", "timestamp": 1660255196, "time_this_iter_s": 37.62380003929138, "time_total_s": 13610.742716312408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13610.742716312408, "timesteps_since_restore": 3404800, "iterations_since_restore": 266, "perf": {"cpu_util_percent": 28.747169811320756, "ram_util_percent": 58.75471698113208}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 608.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.32}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.24, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.66, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0, 630.0, 579.0, 633.0, 633.0, 582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0132060848493245, "mean_processing_ms": 0.27088101809255155, "mean_inference_ms": 1.5871323468376288}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6408000, "num_steps_sampled": 3417600, "sample_time_ms": 24236.974, "load_time_ms": 37.027, "grad_time_ms": 10553.07, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002214438281953335, "policy_loss": -0.00573044503107667, "vf_loss": 85.19269561767578, "vf_explained_var": 0.7630549073219299, "kl": 0.0019984643440693617, "entropy": 1.1487520933151245, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3417600, "episodes_total": 8544, "training_iteration": 267, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-00-32", "timestamp": 1660255232, "time_this_iter_s": 35.333903789520264, "time_total_s": 13646.076620101929, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13646.076620101929, "timesteps_since_restore": 3417600, "iterations_since_restore": 267, "perf": {"cpu_util_percent": 32.378, "ram_util_percent": 58.867999999999995}} +{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 301.95}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.1, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.73, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0, 582.0, 627.0, 573.0, 630.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0, 630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0111539095910416, "mean_processing_ms": 0.2704732512620114, "mean_inference_ms": 1.5859630571947259}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6432000, "num_steps_sampled": 3430400, "sample_time_ms": 24395.085, "load_time_ms": 36.597, "grad_time_ms": 10568.153, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002145373960956931, "policy_loss": -0.006650958210229874, "vf_loss": 93.7173843383789, "vf_explained_var": 0.7514896392822266, "kl": 0.0022997509222477674, "entropy": 1.150799036026001, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3430400, "episodes_total": 8576, "training_iteration": 268, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-01-05", "timestamp": 1660255265, "time_this_iter_s": 33.08881878852844, "time_total_s": 13679.165438890457, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13679.165438890457, "timesteps_since_restore": 3430400, "iterations_since_restore": 268, "perf": {"cpu_util_percent": 29.461702127659574, "ram_util_percent": 58.71702127659575}} +{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 301.685}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 184.97, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0, 579.0, 582.0, 630.0, 636.0, 630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0, 579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0091026125530362, "mean_processing_ms": 0.27006446196399875, "mean_inference_ms": 1.584328225519923}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6456000, "num_steps_sampled": 3443200, "sample_time_ms": 23964.302, "load_time_ms": 37.107, "grad_time_ms": 10520.809, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005307864397764206, "policy_loss": -0.0031857620924711227, "vf_loss": 90.64007568359375, "vf_explained_var": 0.7599647641181946, "kl": 0.0021453702356666327, "entropy": 1.14076566696167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3443200, "episodes_total": 8608, "training_iteration": 269, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-01-34", "timestamp": 1660255294, "time_this_iter_s": 29.06058406829834, "time_total_s": 13708.226022958755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13708.226022958755, "timesteps_since_restore": 3443200, "iterations_since_restore": 269, "perf": {"cpu_util_percent": 32.897560975609764, "ram_util_percent": 58.64878048780488}} +{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 601.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 300.96}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.12, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.36, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.66, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0, 624.0, 627.0, 639.0, 636.0, 579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0, 633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0070340183284676, "mean_processing_ms": 0.26964921546752857, "mean_inference_ms": 1.5821965593270972}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6480000, "num_steps_sampled": 3456000, "sample_time_ms": 23540.277, "load_time_ms": 37.062, "grad_time_ms": 10283.314, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001998053165152669, "policy_loss": -0.00615869602188468, "vf_loss": 87.24394989013672, "vf_explained_var": 0.7729328274726868, "kl": 0.00186056864913553, "entropy": 1.1353095769882202, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3456000, "episodes_total": 8640, "training_iteration": 270, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-02-02", "timestamp": 1660255322, "time_this_iter_s": 27.856099128723145, "time_total_s": 13736.082122087479, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13736.082122087479, "timesteps_since_restore": 3456000, "iterations_since_restore": 270, "perf": {"cpu_util_percent": 34.1025641025641, "ram_util_percent": 58.69999999999998}} +{"episode_reward_max": 639.0, "episode_reward_min": 436.0, "episode_reward_mean": 604.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 302.35}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.3, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.91, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0, 462.0, 630.0, 630.0, 633.0, 633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0, 582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0049654393783378, "mean_processing_ms": 0.26923341324594424, "mean_inference_ms": 1.5798214340992636}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6504000, "num_steps_sampled": 3468800, "sample_time_ms": 23269.747, "load_time_ms": 36.886, "grad_time_ms": 10264.548, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00183187669608742, "policy_loss": -0.006337564438581467, "vf_loss": 87.34710693359375, "vf_explained_var": 0.7639560103416443, "kl": 0.0018626012606546283, "entropy": 1.1305490732192993, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3468800, "episodes_total": 8672, "training_iteration": 271, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-02-32", "timestamp": 1660255352, "time_this_iter_s": 29.946385145187378, "time_total_s": 13766.028507232666, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13766.028507232666, "timesteps_since_restore": 3468800, "iterations_since_restore": 271, "perf": {"cpu_util_percent": 32.345238095238095, "ram_util_percent": 58.68095238095239}} +{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 604.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 302.17}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.14, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0, 582.0, 627.0, 576.0, 570.0, 582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0, 582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.002917566902111, "mean_processing_ms": 0.26882194776394586, "mean_inference_ms": 1.577540173061489}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6528000, "num_steps_sampled": 3481600, "sample_time_ms": 22780.285, "load_time_ms": 36.737, "grad_time_ms": 10254.225, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0052925958298146725, "policy_loss": -0.002937593497335911, "vf_loss": 88.02587890625, "vf_explained_var": 0.7725896835327148, "kl": 0.0019184405682608485, "entropy": 1.144766926765442, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3481600, "episodes_total": 8704, "training_iteration": 272, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-03-02", "timestamp": 1660255382, "time_this_iter_s": 30.47255301475525, "time_total_s": 13796.501060247421, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13796.501060247421, "timesteps_since_restore": 3481600, "iterations_since_restore": 272, "perf": {"cpu_util_percent": 32.334090909090904, "ram_util_percent": 58.665909090909096}} +{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 611.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 305.985}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.77, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.91, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.91, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.91, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0, 587.0, 576.0, 639.0, 590.0, 582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0, 633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0008984560803753, "mean_processing_ms": 0.26841904280245515, "mean_inference_ms": 1.5754578335271856}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6552000, "num_steps_sampled": 3494400, "sample_time_ms": 22479.799, "load_time_ms": 36.968, "grad_time_ms": 10345.078, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007351151201874018, "policy_loss": -0.0004584121925290674, "vf_loss": 83.76141357421875, "vf_explained_var": 0.7616392970085144, "kl": 0.0025154289323836565, "entropy": 1.1331415176391602, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3494400, "episodes_total": 8736, "training_iteration": 273, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-03-34", "timestamp": 1660255414, "time_this_iter_s": 31.54677987098694, "time_total_s": 13828.047840118408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13828.047840118408, "timesteps_since_restore": 3494400, "iterations_since_restore": 273, "perf": {"cpu_util_percent": 36.990909090909085, "ram_util_percent": 59.28409090909091}} +{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 613.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 306.765}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.73, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.81, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0, 587.0, 636.0, 587.0, 639.0, 633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0, 587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9989007894762186, "mean_processing_ms": 0.2680204764201923, "mean_inference_ms": 1.5734861760997552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6576000, "num_steps_sampled": 3507200, "sample_time_ms": 22242.32, "load_time_ms": 36.671, "grad_time_ms": 10040.843, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004968150169588625, "policy_loss": -0.007466705050319433, "vf_loss": 85.29949188232422, "vf_explained_var": 0.7543535232543945, "kl": 0.0017724571516737342, "entropy": 1.1328660249710083, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3507200, "episodes_total": 8768, "training_iteration": 274, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-04-05", "timestamp": 1660255445, "time_this_iter_s": 30.891488075256348, "time_total_s": 13858.939328193665, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13858.939328193665, "timesteps_since_restore": 3507200, "iterations_since_restore": 274, "perf": {"cpu_util_percent": 32.32954545454545, "ram_util_percent": 58.86590909090909}} +{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 610.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.345}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.09, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.76, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 636.0, 582.0, 636.0, 587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0, 633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9969205996800542, "mean_processing_ms": 0.26762558358937055, "mean_inference_ms": 1.571468951953135}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6600000, "num_steps_sampled": 3520000, "sample_time_ms": 21659.121, "load_time_ms": 36.576, "grad_time_ms": 9832.58, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005525531247258186, "policy_loss": -0.002899330807849765, "vf_loss": 89.90011596679688, "vf_explained_var": 0.7821382880210876, "kl": 0.002336545381695032, "entropy": 1.130285382270813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3520000, "episodes_total": 8800, "training_iteration": 275, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-04-34", "timestamp": 1660255474, "time_this_iter_s": 29.723124265670776, "time_total_s": 13888.662452459335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13888.662452459335, "timesteps_since_restore": 3520000, "iterations_since_restore": 275, "perf": {"cpu_util_percent": 34.31666666666666, "ram_util_percent": 58.82619047619047}} +{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 609.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.695}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.59, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0, 639.0, 639.0, 627.0, 633.0, 633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0, 633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9949570051845532, "mean_processing_ms": 0.2672332587743119, "mean_inference_ms": 1.569562529998314}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6624000, "num_steps_sampled": 3532800, "sample_time_ms": 21368.312, "load_time_ms": 36.431, "grad_time_ms": 9398.907, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024656467139720917, "policy_loss": -0.005234332289546728, "vf_loss": 82.6478500366211, "vf_explained_var": 0.7628920078277588, "kl": 0.001980842323973775, "entropy": 1.1296080350875854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3532800, "episodes_total": 8832, "training_iteration": 276, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-05-05", "timestamp": 1660255505, "time_this_iter_s": 30.375401973724365, "time_total_s": 13919.03785443306, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13919.03785443306, "timesteps_since_restore": 3532800, "iterations_since_restore": 276, "perf": {"cpu_util_percent": 33.07209302325582, "ram_util_percent": 58.767441860465105}} +{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 610.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.36}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.9, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0, 582.0, 579.0, 633.0, 639.0, 633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.993008865355136, "mean_processing_ms": 0.26684329670316986, "mean_inference_ms": 1.5676868637222179}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6648000, "num_steps_sampled": 3545600, "sample_time_ms": 21068.026, "load_time_ms": 36.686, "grad_time_ms": 9153.242, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004478854592889547, "policy_loss": -0.0033003378193825483, "vf_loss": 83.42573547363281, "vf_explained_var": 0.7645106911659241, "kl": 0.002364285057410598, "entropy": 1.1267634630203247, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3545600, "episodes_total": 8864, "training_iteration": 277, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-05-35", "timestamp": 1660255535, "time_this_iter_s": 29.876389980316162, "time_total_s": 13948.914244413376, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13948.914244413376, "timesteps_since_restore": 3545600, "iterations_since_restore": 277, "perf": {"cpu_util_percent": 33.73571428571429, "ram_util_percent": 58.82857142857141}} +{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.89}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.38, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.68, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 19.09, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.09, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.92, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0, 584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9910719747703968, "mean_processing_ms": 0.26645500095607105, "mean_inference_ms": 1.5658690640351203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6672000, "num_steps_sampled": 3558400, "sample_time_ms": 20841.044, "load_time_ms": 37.005, "grad_time_ms": 9278.753, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004964211490005255, "policy_loss": -0.0031354122329503298, "vf_loss": 86.61837768554688, "vf_explained_var": 0.7653247714042664, "kl": 0.0020841285586357117, "entropy": 1.1244021654129028, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3558400, "episodes_total": 8896, "training_iteration": 278, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-06-07", "timestamp": 1660255567, "time_this_iter_s": 32.07894992828369, "time_total_s": 13980.99319434166, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13980.99319434166, "timesteps_since_restore": 3558400, "iterations_since_restore": 278, "perf": {"cpu_util_percent": 33.79111111111111, "ram_util_percent": 58.81111111111109}} +{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.905}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.41, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.73, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.9, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.86, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0, 627.0, 530.0, 627.0, 636.0, 584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0, 633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9891622022532315, "mean_processing_ms": 0.26607283169040735, "mean_inference_ms": 1.56436366595401}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6696000, "num_steps_sampled": 3571200, "sample_time_ms": 21399.6, "load_time_ms": 36.9, "grad_time_ms": 9533.999, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006114859133958817, "policy_loss": -0.007230747956782579, "vf_loss": 84.06954956054688, "vf_explained_var": 0.7658140063285828, "kl": 0.0017542889108881354, "entropy": 1.129442572593689, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3571200, "episodes_total": 8928, "training_iteration": 279, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-06-44", "timestamp": 1660255604, "time_this_iter_s": 37.19960618019104, "time_total_s": 14018.19280052185, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14018.19280052185, "timesteps_since_restore": 3571200, "iterations_since_restore": 279, "perf": {"cpu_util_percent": 33.281132075471696, "ram_util_percent": 58.9301886792453}} +{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 612.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.095}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.79, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.72, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.73, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0, 627.0, 582.0, 630.0, 639.0, 633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0, 633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.987279141463972, "mean_processing_ms": 0.26569893386655014, "mean_inference_ms": 1.5631172104354278}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6720000, "num_steps_sampled": 3584000, "sample_time_ms": 21992.586, "load_time_ms": 36.83, "grad_time_ms": 9822.342, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033407146111130714, "policy_loss": -0.005221154540777206, "vf_loss": 91.33563232421875, "vf_explained_var": 0.7713200449943542, "kl": 0.001954694977030158, "entropy": 1.143385887145996, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3584000, "episodes_total": 8960, "training_iteration": 280, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-07-21", "timestamp": 1660255641, "time_this_iter_s": 36.67114806175232, "time_total_s": 14054.863948583603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14054.863948583603, "timesteps_since_restore": 3584000, "iterations_since_restore": 280, "perf": {"cpu_util_percent": 33.917307692307695, "ram_util_percent": 58.82115384615383}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.365}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.33, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 627.0, 516.0, 587.0, 639.0, 633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0, 636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.985427132070836, "mean_processing_ms": 0.26533447466026966, "mean_inference_ms": 1.5623431092422566}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6744000, "num_steps_sampled": 3596800, "sample_time_ms": 22602.144, "load_time_ms": 37.106, "grad_time_ms": 10160.693, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006504642311483622, "policy_loss": -0.002157183364033699, "vf_loss": 92.3310546875, "vf_explained_var": 0.768465518951416, "kl": 0.002224028343334794, "entropy": 1.1425694227218628, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3596800, "episodes_total": 8992, "training_iteration": 281, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-08-00", "timestamp": 1660255680, "time_this_iter_s": 39.430299043655396, "time_total_s": 14094.294247627258, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14094.294247627258, "timesteps_since_restore": 3596800, "iterations_since_restore": 281, "perf": {"cpu_util_percent": 32.93571428571428, "ram_util_percent": 58.800000000000004}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 303.4}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.8, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.25, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0, 630.0, 587.0, 633.0, 639.0, 636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9835752103024169, "mean_processing_ms": 0.26496930123552576, "mean_inference_ms": 1.5613527088644699}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6768000, "num_steps_sampled": 3609600, "sample_time_ms": 22700.667, "load_time_ms": 36.992, "grad_time_ms": 10562.273, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004271908197551966, "policy_loss": -0.0035006285179406404, "vf_loss": 83.40963745117188, "vf_explained_var": 0.7725582718849182, "kl": 0.0017563734436407685, "entropy": 1.1368495225906372, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3609600, "episodes_total": 9024, "training_iteration": 282, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-08-36", "timestamp": 1660255716, "time_this_iter_s": 35.46651792526245, "time_total_s": 14129.76076555252, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14129.76076555252, "timesteps_since_restore": 3609600, "iterations_since_restore": 282, "perf": {"cpu_util_percent": 33.821999999999996, "ram_util_percent": 59.328}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 614.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.035}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.87, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.29, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0, 630.0, 630.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9817221783710357, "mean_processing_ms": 0.2646010611587108, "mean_inference_ms": 1.5601389392518195}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6792000, "num_steps_sampled": 3622400, "sample_time_ms": 22788.728, "load_time_ms": 36.79, "grad_time_ms": 10687.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030612878035753965, "policy_loss": -0.0045290542766451836, "vf_loss": 81.5626449584961, "vf_explained_var": 0.7761082053184509, "kl": 0.0021392148919403553, "entropy": 1.131847858428955, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3622400, "episodes_total": 9056, "training_iteration": 283, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-09-09", "timestamp": 1660255749, "time_this_iter_s": 33.67844009399414, "time_total_s": 14163.439205646515, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14163.439205646515, "timesteps_since_restore": 3622400, "iterations_since_restore": 283, "perf": {"cpu_util_percent": 34.11489361702128, "ram_util_percent": 59.04893617021279}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.985}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.97, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.13, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.56, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.97, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.56, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.97, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.56, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.97, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0, 636.0, 587.0, 639.0, 639.0, 579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0, 639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9798702213008988, "mean_processing_ms": 0.26423169692571163, "mean_inference_ms": 1.5586086454060646}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6816000, "num_steps_sampled": 3635200, "sample_time_ms": 22897.149, "load_time_ms": 36.929, "grad_time_ms": 10771.125, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005141934845596552, "policy_loss": -0.003231912851333618, "vf_loss": 89.45598602294922, "vf_explained_var": 0.7527138590812683, "kl": 0.0021111962851136923, "entropy": 1.1434991359710693, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3635200, "episodes_total": 9088, "training_iteration": 284, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-09-42", "timestamp": 1660255782, "time_this_iter_s": 32.81455707550049, "time_total_s": 14196.253762722015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14196.253762722015, "timesteps_since_restore": 3635200, "iterations_since_restore": 284, "perf": {"cpu_util_percent": 32.710638297872336, "ram_util_percent": 58.93617021276594}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 616.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 308.355}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.51, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.978026720054701, "mean_processing_ms": 0.2638645692972578, "mean_inference_ms": 1.5570587102664553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6840000, "num_steps_sampled": 3648000, "sample_time_ms": 23082.722, "load_time_ms": 36.933, "grad_time_ms": 10916.497, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002097133779898286, "policy_loss": -0.005807385314255953, "vf_loss": 84.70693969726562, "vf_explained_var": 0.7814067006111145, "kl": 0.0015371787594631314, "entropy": 1.1323403120040894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3648000, "episodes_total": 9120, "training_iteration": 285, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-10-15", "timestamp": 1660255815, "time_this_iter_s": 33.031522035598755, "time_total_s": 14229.285284757614, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14229.285284757614, "timesteps_since_restore": 3648000, "iterations_since_restore": 285, "perf": {"cpu_util_percent": 32.80434782608696, "ram_util_percent": 58.88260869565216}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 616.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.135}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.47, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.33, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.8, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 636.0, 587.0, 639.0, 633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0, 582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9761954337234573, "mean_processing_ms": 0.26349990327404404, "mean_inference_ms": 1.5554986152813894}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6864000, "num_steps_sampled": 3660800, "sample_time_ms": 23037.067, "load_time_ms": 36.944, "grad_time_ms": 11090.889, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016592548927292228, "policy_loss": -0.006142645608633757, "vf_loss": 83.6804428100586, "vf_explained_var": 0.7674832344055176, "kl": 0.0020798875484615564, "entropy": 1.1322760581970215, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3660800, "episodes_total": 9152, "training_iteration": 286, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-10-47", "timestamp": 1660255847, "time_this_iter_s": 31.660379886627197, "time_total_s": 14260.945664644241, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14260.945664644241, "timesteps_since_restore": 3660800, "iterations_since_restore": 286, "perf": {"cpu_util_percent": 33.97111111111111, "ram_util_percent": 58.86666666666669}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 618.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 309.115}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.83, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.54, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.68, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0, 596.0, 579.0, 630.0, 633.0, 582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0, 633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9743700047220856, "mean_processing_ms": 0.2631349992390798, "mean_inference_ms": 1.553816906350355}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6888000, "num_steps_sampled": 3673600, "sample_time_ms": 22960.493, "load_time_ms": 36.753, "grad_time_ms": 11340.647, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018661068752408028, "policy_loss": -0.005918534938246012, "vf_loss": 83.52860260009766, "vf_explained_var": 0.7654721140861511, "kl": 0.0018988008378073573, "entropy": 1.136439561843872, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3673600, "episodes_total": 9184, "training_iteration": 287, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-11-18", "timestamp": 1660255878, "time_this_iter_s": 31.607279777526855, "time_total_s": 14292.552944421768, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14292.552944421768, "timesteps_since_restore": 3673600, "iterations_since_restore": 287, "perf": {"cpu_util_percent": 34.857777777777784, "ram_util_percent": 58.80666666666665}} +{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 622.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 311.005}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 190.81, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.43, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.79, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0, 633.0, 636.0, 636.0, 636.0, 633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9725594290722099, "mean_processing_ms": 0.2627719670023304, "mean_inference_ms": 1.5521036278405136}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6912000, "num_steps_sampled": 3686400, "sample_time_ms": 23043.972, "load_time_ms": 36.653, "grad_time_ms": 11285.002, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023610887583345175, "policy_loss": -0.005447230767458677, "vf_loss": 83.72765350341797, "vf_explained_var": 0.7662909030914307, "kl": 0.001831754925660789, "entropy": 1.128881573677063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3686400, "episodes_total": 9216, "training_iteration": 288, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-11-51", "timestamp": 1660255911, "time_this_iter_s": 32.35726475715637, "time_total_s": 14324.910209178925, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14324.910209178925, "timesteps_since_restore": 3686400, "iterations_since_restore": 288, "perf": {"cpu_util_percent": 32.42, "ram_util_percent": 58.875555555555565}} +{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 621.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 310.955}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 190.71, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.17, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.98, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.24, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.24, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.71, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.24, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.24, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0, 584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9707646787589033, "mean_processing_ms": 0.26241176618575823, "mean_inference_ms": 1.5504455690384487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6936000, "num_steps_sampled": 3699200, "sample_time_ms": 22728.739, "load_time_ms": 36.546, "grad_time_ms": 11195.454, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004752982931677252, "policy_loss": -0.00697875814512372, "vf_loss": 80.24703979492188, "vf_explained_var": 0.7700864672660828, "kl": 0.001980138709768653, "entropy": 1.1412941217422485, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3699200, "episodes_total": 9248, "training_iteration": 289, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-12-24", "timestamp": 1660255944, "time_this_iter_s": 33.15079879760742, "time_total_s": 14358.061007976532, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14358.061007976532, "timesteps_since_restore": 3699200, "iterations_since_restore": 289, "perf": {"cpu_util_percent": 32.6936170212766, "ram_util_percent": 58.93829787234045}} +{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 617.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 308.815}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.63, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 19.11, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.35, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.81, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0, 639.0, 582.0, 546.0, 636.0, 584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9689852254621897, "mean_processing_ms": 0.26205467993702586, "mean_inference_ms": 1.548820818018191}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6960000, "num_steps_sampled": 3712000, "sample_time_ms": 22386.717, "load_time_ms": 37.444, "grad_time_ms": 11069.639, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020440255757421255, "policy_loss": -0.0058852084912359715, "vf_loss": 84.9912338256836, "vf_explained_var": 0.7650973200798035, "kl": 0.0021299307700246572, "entropy": 1.1397589445114136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3712000, "episodes_total": 9280, "training_iteration": 290, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-12-56", "timestamp": 1660255976, "time_this_iter_s": 32.003324031829834, "time_total_s": 14390.064332008362, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14390.064332008362, "timesteps_since_restore": 3712000, "iterations_since_restore": 290, "perf": {"cpu_util_percent": 30.039130434782606, "ram_util_percent": 58.817391304347815}} +{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 614.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.38}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.76, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 19.15, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.2, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.2, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.2, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0, 630.0, 630.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0, 590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.967235186119729, "mean_processing_ms": 0.26170596959071135, "mean_inference_ms": 1.5471921568344904}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6984000, "num_steps_sampled": 3724800, "sample_time_ms": 21912.426, "load_time_ms": 37.292, "grad_time_ms": 10653.236, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003775561461225152, "policy_loss": -0.004369485657662153, "vf_loss": 87.11421966552734, "vf_explained_var": 0.7634318470954895, "kl": 0.0017795447492972016, "entropy": 1.1327377557754517, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3724800, "episodes_total": 9312, "training_iteration": 291, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-13-27", "timestamp": 1660256007, "time_this_iter_s": 30.522319793701172, "time_total_s": 14420.586651802063, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14420.586651802063, "timesteps_since_restore": 3724800, "iterations_since_restore": 291, "perf": {"cpu_util_percent": 38.19767441860465, "ram_util_percent": 59.255813953488385}} +{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 610.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.465}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 187.73, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.16, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.04, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.25, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0, 630.0, 582.0, 636.0, 639.0, 590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0, 544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9654876642180179, "mean_processing_ms": 0.26135729355980103, "mean_inference_ms": 1.545357165029807}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7008000, "num_steps_sampled": 3737600, "sample_time_ms": 21647.86, "load_time_ms": 37.443, "grad_time_ms": 10160.016, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012383932480588555, "policy_loss": -0.006961038801819086, "vf_loss": 87.61873626708984, "vf_explained_var": 0.757759153842926, "kl": 0.001912236213684082, "entropy": 1.1248730421066284, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3737600, "episodes_total": 9344, "training_iteration": 292, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-13-55", "timestamp": 1660256035, "time_this_iter_s": 27.889997720718384, "time_total_s": 14448.476649522781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14448.476649522781, "timesteps_since_restore": 3737600, "iterations_since_restore": 292, "perf": {"cpu_util_percent": 31.551282051282044, "ram_util_percent": 59.09743589743588}} +{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 610.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.435}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.67, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.18, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.39, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0, 630.0, 587.0, 587.0, 630.0, 544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0, 582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9637668687735891, "mean_processing_ms": 0.2610164264718474, "mean_inference_ms": 1.5436541723929016}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7032000, "num_steps_sampled": 3750400, "sample_time_ms": 21753.951, "load_time_ms": 37.441, "grad_time_ms": 10065.741, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023420238867402077, "policy_loss": -0.005699212197214365, "vf_loss": 86.0804214477539, "vf_explained_var": 0.7711854577064514, "kl": 0.0016376747516915202, "entropy": 1.1336089372634888, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3750400, "episodes_total": 9376, "training_iteration": 293, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-14-28", "timestamp": 1660256068, "time_this_iter_s": 33.79537034034729, "time_total_s": 14482.272019863129, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14482.272019863129, "timesteps_since_restore": 3750400, "iterations_since_restore": 293, "perf": {"cpu_util_percent": 29.666666666666668, "ram_util_percent": 58.67083333333335}} +{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 615.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.66}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.52, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.56, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.12, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.12, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.12, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0, 587.0, 633.0, 587.0, 576.0, 582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0, 555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9620401402665292, "mean_processing_ms": 0.2606729789750365, "mean_inference_ms": 1.5418645190919325}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7056000, "num_steps_sampled": 3763200, "sample_time_ms": 21541.312, "load_time_ms": 37.573, "grad_time_ms": 10164.498, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031338699627667665, "policy_loss": -0.004722007550299168, "vf_loss": 84.24658966064453, "vf_explained_var": 0.7650328278541565, "kl": 0.0023102371487766504, "entropy": 1.1375713348388672, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3763200, "episodes_total": 9408, "training_iteration": 294, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-15-00", "timestamp": 1660256100, "time_this_iter_s": 31.67550492286682, "time_total_s": 14513.947524785995, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14513.947524785995, "timesteps_since_restore": 3763200, "iterations_since_restore": 294, "perf": {"cpu_util_percent": 30.09333333333334, "ram_util_percent": 58.68444444444443}} +{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 616.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 308.195}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 189.19, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.41, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.64, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.3, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.48, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.85, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.79, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.3, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.3, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0, 587.0, 544.0, 636.0, 633.0, 555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9603539643629071, "mean_processing_ms": 0.26034177347662363, "mean_inference_ms": 1.5408082131746255}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7080000, "num_steps_sampled": 3776000, "sample_time_ms": 22187.522, "load_time_ms": 38.247, "grad_time_ms": 10375.626, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001668116747168824, "policy_loss": -0.007916351780295372, "vf_loss": 83.1385726928711, "vf_explained_var": 0.7759819626808167, "kl": 0.0019673772621899843, "entropy": 1.1286202669143677, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3776000, "episodes_total": 9440, "training_iteration": 295, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-15-42", "timestamp": 1660256142, "time_this_iter_s": 41.61074709892273, "time_total_s": 14555.558271884918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14555.558271884918, "timesteps_since_restore": 3776000, "iterations_since_restore": 295, "perf": {"cpu_util_percent": 32.182758620689654, "ram_util_percent": 58.76206896551724}} +{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 614.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 307.085}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.57, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 19.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.39, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.14, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.83, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.14, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.14, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0, 339.0, 579.0, 579.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0, 639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9586694910648496, "mean_processing_ms": 0.26000987158476824, "mean_inference_ms": 1.539770678675762}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7104000, "num_steps_sampled": 3788800, "sample_time_ms": 22271.193, "load_time_ms": 38.04, "grad_time_ms": 10225.976, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004963720217347145, "policy_loss": -0.003322723088786006, "vf_loss": 88.4856948852539, "vf_explained_var": 0.7634937167167664, "kl": 0.0021246925462037325, "entropy": 1.1242562532424927, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3788800, "episodes_total": 9472, "training_iteration": 296, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-16-13", "timestamp": 1660256173, "time_this_iter_s": 30.998157024383545, "time_total_s": 14586.556428909302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14586.556428909302, "timesteps_since_restore": 3788800, "iterations_since_restore": 296, "perf": {"cpu_util_percent": 31.388636363636365, "ram_util_percent": 58.774999999999984}} +{"episode_reward_max": 639.0, "episode_reward_min": 413.0, "episode_reward_mean": 616.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 308.395}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.99, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.2, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.4, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.34, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.77, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.34, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.34, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 633.0, 630.0, 569.0, 639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0, 618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9570006029896723, "mean_processing_ms": 0.25968048709196123, "mean_inference_ms": 1.5389220446317904}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7128000, "num_steps_sampled": 3801600, "sample_time_ms": 22480.496, "load_time_ms": 38.114, "grad_time_ms": 10354.328, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004928060807287693, "policy_loss": -0.0034768336918205023, "vf_loss": 89.6873550415039, "vf_explained_var": 0.7655234336853027, "kl": 0.0019178093643859029, "entropy": 1.1276906728744507, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3801600, "episodes_total": 9504, "training_iteration": 297, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-16-48", "timestamp": 1660256208, "time_this_iter_s": 34.984565019607544, "time_total_s": 14621.54099392891, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14621.54099392891, "timesteps_since_restore": 3801600, "iterations_since_restore": 297, "perf": {"cpu_util_percent": 30.266, "ram_util_percent": 58.788000000000004}} +{"episode_reward_max": 639.0, "episode_reward_min": 433.0, "episode_reward_mean": 615.99, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 307.995}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.59, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.45, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.92, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.96, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.16, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.99, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.99, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.99, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0, 587.0, 636.0, 627.0, 633.0, 618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0, 587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9553216188480552, "mean_processing_ms": 0.25934597014735267, "mean_inference_ms": 1.5375890964227674}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7152000, "num_steps_sampled": 3814400, "sample_time_ms": 22450.487, "load_time_ms": 38.169, "grad_time_ms": 10228.148, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004324051551520824, "policy_loss": -0.003937617409974337, "vf_loss": 88.24027252197266, "vf_explained_var": 0.7693286538124084, "kl": 0.00227510672993958, "entropy": 1.1247196197509766, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3814400, "episodes_total": 9536, "training_iteration": 298, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-17-18", "timestamp": 1660256238, "time_this_iter_s": 30.79404616355896, "time_total_s": 14652.335040092468, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14652.335040092468, "timesteps_since_restore": 3814400, "iterations_since_restore": 298, "perf": {"cpu_util_percent": 31.34418604651162, "ram_util_percent": 58.76279069767441}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 615.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.765}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.53, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.1, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.27, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.43, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.73, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.43, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.43, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0, 633.0, 639.0, 576.0, 582.0, 587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9536494258770276, "mean_processing_ms": 0.25901133252507974, "mean_inference_ms": 1.536150189883149}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7176000, "num_steps_sampled": 3827200, "sample_time_ms": 22404.117, "load_time_ms": 38.01, "grad_time_ms": 10081.123, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014618774875998497, "policy_loss": -0.00924667902290821, "vf_loss": 83.49740600585938, "vf_explained_var": 0.7685635685920715, "kl": 0.0018500644946470857, "entropy": 1.12986421585083, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3827200, "episodes_total": 9568, "training_iteration": 299, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-17-50", "timestamp": 1660256270, "time_this_iter_s": 31.213135242462158, "time_total_s": 14683.54817533493, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14683.54817533493, "timesteps_since_restore": 3827200, "iterations_since_restore": 299, "perf": {"cpu_util_percent": 30.386363636363637, "ram_util_percent": 58.77272727272726}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 616.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.425}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.65, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.79, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.19, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.69, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.27, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.69, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.69, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0, 630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9519895892120465, "mean_processing_ms": 0.2586797182005827, "mean_inference_ms": 1.5346818190110434}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7200000, "num_steps_sampled": 3840000, "sample_time_ms": 22522.727, "load_time_ms": 37.151, "grad_time_ms": 10013.326, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0025208042934536934, "policy_loss": -0.005239995662122965, "vf_loss": 83.27434539794922, "vf_explained_var": 0.7729237675666809, "kl": 0.0018271300941705704, "entropy": 1.133251667022705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3840000, "episodes_total": 9600, "training_iteration": 300, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-18-22", "timestamp": 1660256302, "time_this_iter_s": 32.498526096343994, "time_total_s": 14716.046701431274, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14716.046701431274, "timesteps_since_restore": 3840000, "iterations_since_restore": 300, "perf": {"cpu_util_percent": 33.44782608695652, "ram_util_percent": 58.9717391304348}} +{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 617.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.925}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.85, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.53, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.02, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0, 587.0, 630.0, 633.0, 636.0, 630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0, 587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9503424273012522, "mean_processing_ms": 0.25835032935285485, "mean_inference_ms": 1.5332766289322552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7224000, "num_steps_sampled": 3852800, "sample_time_ms": 22579.338, "load_time_ms": 37.118, "grad_time_ms": 10147.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018557598814368248, "policy_loss": -0.005808284040540457, "vf_loss": 82.31928253173828, "vf_explained_var": 0.7716462016105652, "kl": 0.001915976870805025, "entropy": 1.1357669830322266, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3852800, "episodes_total": 9632, "training_iteration": 301, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-18-56", "timestamp": 1660256336, "time_this_iter_s": 32.42415189743042, "time_total_s": 14748.470853328705, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14748.470853328705, "timesteps_since_restore": 3852800, "iterations_since_restore": 301, "perf": {"cpu_util_percent": 32.14468085106383, "ram_util_percent": 59.210638297872315}} +{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 616.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.055}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.51, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.05, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.71, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.41, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.33, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.71, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.71, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0, 633.0, 581.0, 582.0, 636.0, 587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0, 630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9487147885298458, "mean_processing_ms": 0.25802520052917743, "mean_inference_ms": 1.5320224616406188}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7248000, "num_steps_sampled": 3865600, "sample_time_ms": 22988.548, "load_time_ms": 36.983, "grad_time_ms": 10329.984, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00113882205914706, "policy_loss": -0.0069201975129544735, "vf_loss": 86.32308959960938, "vf_explained_var": 0.7628341317176819, "kl": 0.0021745546255260706, "entropy": 1.146581768989563, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3865600, "episodes_total": 9664, "training_iteration": 302, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-19-29", "timestamp": 1660256369, "time_this_iter_s": 33.80998110771179, "time_total_s": 14782.280834436417, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14782.280834436417, "timesteps_since_restore": 3865600, "iterations_since_restore": 302, "perf": {"cpu_util_percent": 31.58125, "ram_util_percent": 58.88958333333333}} +{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 610.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 305.09}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 186.58, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.47, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 17.0, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0, 636.0, 633.0, 630.0, 579.0, 630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0, 630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9470934663079638, "mean_processing_ms": 0.2577011854235747, "mean_inference_ms": 1.5308098128397853}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7272000, "num_steps_sampled": 3878400, "sample_time_ms": 22956.932, "load_time_ms": 37.245, "grad_time_ms": 10308.821, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009572577546350658, "policy_loss": -0.00890685711055994, "vf_loss": 85.18466186523438, "vf_explained_var": 0.7909882068634033, "kl": 0.00206771120429039, "entropy": 1.137712836265564, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3878400, "episodes_total": 9696, "training_iteration": 303, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-20-03", "timestamp": 1660256403, "time_this_iter_s": 33.27155518531799, "time_total_s": 14815.552389621735, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14815.552389621735, "timesteps_since_restore": 3878400, "iterations_since_restore": 303, "perf": {"cpu_util_percent": 32.59574468085106, "ram_util_percent": 58.840425531914875}} +{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 606.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 303.28}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 185.76, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.34, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0, 587.0, 581.0, 582.0, 639.0, 630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0, 639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9454753487433492, "mean_processing_ms": 0.2573778744956284, "mean_inference_ms": 1.529523880785963}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7296000, "num_steps_sampled": 3891200, "sample_time_ms": 23052.875, "load_time_ms": 37.015, "grad_time_ms": 10279.094, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008152422960847616, "policy_loss": -0.008779828436672688, "vf_loss": 85.31393432617188, "vf_explained_var": 0.7709566950798035, "kl": 0.0019413350382819772, "entropy": 1.1336184740066528, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3891200, "episodes_total": 9728, "training_iteration": 304, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-20-35", "timestamp": 1660256435, "time_this_iter_s": 32.335684061050415, "time_total_s": 14847.888073682785, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14847.888073682785, "timesteps_since_restore": 3891200, "iterations_since_restore": 304, "perf": {"cpu_util_percent": 32.43260869565216, "ram_util_percent": 58.8478260869565}} +{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 608.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.03}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 186.86, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.81, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0, 563.0, 636.0, 582.0, 636.0, 639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0, 630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9438635758880244, "mean_processing_ms": 0.25705717388077914, "mean_inference_ms": 1.528223444830069}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7320000, "num_steps_sampled": 3904000, "sample_time_ms": 22489.17, "load_time_ms": 36.377, "grad_time_ms": 10072.238, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005805303808301687, "policy_loss": -0.002536727814003825, "vf_loss": 89.14191436767578, "vf_explained_var": 0.7592394948005676, "kl": 0.0022071560379117727, "entropy": 1.1443239450454712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3904000, "episodes_total": 9760, "training_iteration": 305, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-21-09", "timestamp": 1660256469, "time_this_iter_s": 33.905731201171875, "time_total_s": 14881.793804883957, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14881.793804883957, "timesteps_since_restore": 3904000, "iterations_since_restore": 305, "perf": {"cpu_util_percent": 30.185416666666665, "ram_util_percent": 58.73750000000001}} +{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 612.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.015}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.43, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.14, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.82, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0, 630.0, 636.0, 582.0, 633.0, 630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0, 587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9422548001581954, "mean_processing_ms": 0.2567378512099078, "mean_inference_ms": 1.5267428709470499}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7344000, "num_steps_sampled": 3916800, "sample_time_ms": 22275.217, "load_time_ms": 36.758, "grad_time_ms": 10257.278, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026395271997898817, "policy_loss": -0.005618779454380274, "vf_loss": 88.24600219726562, "vf_explained_var": 0.7727122902870178, "kl": 0.0020911165047436953, "entropy": 1.1326097249984741, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3916800, "episodes_total": 9792, "training_iteration": 306, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-21-40", "timestamp": 1660256500, "time_this_iter_s": 30.71598792076111, "time_total_s": 14912.509792804718, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14912.509792804718, "timesteps_since_restore": 3916800, "iterations_since_restore": 306, "perf": {"cpu_util_percent": 32.81395348837209, "ram_util_percent": 58.667441860465125}} +{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 617.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.845}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 190.49, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0, 582.0, 633.0, 636.0, 636.0, 587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0, 636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9406496030307164, "mean_processing_ms": 0.25641787950901196, "mean_inference_ms": 1.5250430030640023}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7368000, "num_steps_sampled": 3929600, "sample_time_ms": 21842.439, "load_time_ms": 36.991, "grad_time_ms": 10104.518, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00218362919986248, "policy_loss": -0.005569128785282373, "vf_loss": 83.15591430664062, "vf_explained_var": 0.7728936076164246, "kl": 0.0017335275188088417, "entropy": 1.1256619691848755, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3929600, "episodes_total": 9824, "training_iteration": 307, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-22-09", "timestamp": 1660256529, "time_this_iter_s": 29.1308012008667, "time_total_s": 14941.640594005585, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14941.640594005585, "timesteps_since_restore": 3929600, "iterations_since_restore": 307, "perf": {"cpu_util_percent": 35.333333333333336, "ram_util_percent": 58.726190476190474}} +{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 620.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 310.09}, "custom_metrics": {"sparse_reward_mean": 214.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 190.58, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.43, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.43, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.43, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0, 636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9390469484848887, "mean_processing_ms": 0.2560959206110981, "mean_inference_ms": 1.5230623769962466}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7392000, "num_steps_sampled": 3942400, "sample_time_ms": 21606.28, "load_time_ms": 36.86, "grad_time_ms": 10018.576, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012893896782770753, "policy_loss": -0.006317433435469866, "vf_loss": 81.66983795166016, "vf_explained_var": 0.7689216732978821, "kl": 0.0018363837152719498, "entropy": 1.1203217506408691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3942400, "episodes_total": 9856, "training_iteration": 308, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-22-36", "timestamp": 1660256556, "time_this_iter_s": 27.574139833450317, "time_total_s": 14969.214733839035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14969.214733839035, "timesteps_since_restore": 3942400, "iterations_since_restore": 308, "perf": {"cpu_util_percent": 34.44871794871795, "ram_util_percent": 58.748717948717946}} +{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 625.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 312.605}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.61, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.85, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.25, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.2, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.2, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.2, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0, 636.0, 636.0, 630.0, 536.0, 636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0, 633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9374628711409474, "mean_processing_ms": 0.2557781242683051, "mean_inference_ms": 1.5211277740560474}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7416000, "num_steps_sampled": 3955200, "sample_time_ms": 21548.086, "load_time_ms": 36.789, "grad_time_ms": 9960.253, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017820480279624462, "policy_loss": -0.005541125778108835, "vf_loss": 78.87618255615234, "vf_explained_var": 0.777707040309906, "kl": 0.0019577995408326387, "entropy": 1.1288975477218628, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3955200, "episodes_total": 9888, "training_iteration": 309, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-23-06", "timestamp": 1660256586, "time_this_iter_s": 30.049942016601562, "time_total_s": 14999.264675855637, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14999.264675855637, "timesteps_since_restore": 3955200, "iterations_since_restore": 309, "perf": {"cpu_util_percent": 34.733333333333334, "ram_util_percent": 58.76428571428571}} +{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 625.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 312.6}, "custom_metrics": {"sparse_reward_mean": 217.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.2, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.32, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.98, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.36, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.65, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.07, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.65, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.07, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.65, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.07, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0, 639.0, 636.0, 624.0, 639.0, 633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9359034441976002, "mean_processing_ms": 0.2554667399859708, "mean_inference_ms": 1.5193431419939385}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7440000, "num_steps_sampled": 3968000, "sample_time_ms": 21381.604, "load_time_ms": 36.734, "grad_time_ms": 9849.343, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021060302387923002, "policy_loss": -0.005640763323754072, "vf_loss": 83.09170532226562, "vf_explained_var": 0.7722363471984863, "kl": 0.0021093024406582117, "entropy": 1.12474524974823, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3968000, "episodes_total": 9920, "training_iteration": 310, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-23-36", "timestamp": 1660256616, "time_this_iter_s": 29.72802186012268, "time_total_s": 15028.99269771576, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15028.99269771576, "timesteps_since_restore": 3968000, "iterations_since_restore": 310, "perf": {"cpu_util_percent": 37.352380952380955, "ram_util_percent": 59.30714285714285}} +{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 623.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 311.605}, "custom_metrics": {"sparse_reward_mean": 216.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.21, "shaped_reward_min": 166, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.26, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.78, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.78, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.78, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0, 630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9343560450317893, "mean_processing_ms": 0.25515871155662695, "mean_inference_ms": 1.5176487497740194}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7464000, "num_steps_sampled": 3980800, "sample_time_ms": 21156.178, "load_time_ms": 36.97, "grad_time_ms": 9869.74, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005363213713280857, "policy_loss": -0.0070701222866773605, "vf_loss": 81.7235336303711, "vf_explained_var": 0.7686123847961426, "kl": 0.0019356707343831658, "entropy": 1.131825566291809, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3980800, "episodes_total": 9952, "training_iteration": 311, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-24-07", "timestamp": 1660256647, "time_this_iter_s": 30.375731229782104, "time_total_s": 15059.368428945541, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15059.368428945541, "timesteps_since_restore": 3980800, "iterations_since_restore": 311, "perf": {"cpu_util_percent": 34.15581395348838, "ram_util_percent": 58.86046511627907}} +{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 621.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 310.815}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.43, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.35, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.84, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.65, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.3, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.65, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.65, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0, 627.0, 636.0, 627.0, 533.0, 630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0, 636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9328126111792946, "mean_processing_ms": 0.25484999330507013, "mean_inference_ms": 1.5158702163945572}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7488000, "num_steps_sampled": 3993600, "sample_time_ms": 20762.299, "load_time_ms": 37.78, "grad_time_ms": 9801.464, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010485065868124366, "policy_loss": -0.00627841567620635, "vf_loss": 78.91202545166016, "vf_explained_var": 0.7650337219238281, "kl": 0.0021341259125620127, "entropy": 1.1285619735717773, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3993600, "episodes_total": 9984, "training_iteration": 312, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-24-36", "timestamp": 1660256676, "time_this_iter_s": 29.196868896484375, "time_total_s": 15088.565297842026, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15088.565297842026, "timesteps_since_restore": 3993600, "iterations_since_restore": 312, "perf": {"cpu_util_percent": 34.93170731707317, "ram_util_percent": 58.778048780487794}} +{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 620.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 310.045}, "custom_metrics": {"sparse_reward_mean": 215.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.69, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.75, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.86, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.36, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.34, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0, 633.0, 633.0, 624.0, 636.0, 636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0, 636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9312771580791189, "mean_processing_ms": 0.25454145444446286, "mean_inference_ms": 1.514166938443501}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7512000, "num_steps_sampled": 4006400, "sample_time_ms": 20653.913, "load_time_ms": 37.661, "grad_time_ms": 9796.127, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016649666940793395, "policy_loss": -0.005544379819184542, "vf_loss": 77.78628540039062, "vf_explained_var": 0.7735397815704346, "kl": 0.0018068948993459344, "entropy": 1.1385550498962402, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4006400, "episodes_total": 10016, "training_iteration": 313, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-25-08", "timestamp": 1660256708, "time_this_iter_s": 32.13484477996826, "time_total_s": 15120.700142621994, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15120.700142621994, "timesteps_since_restore": 4006400, "iterations_since_restore": 313, "perf": {"cpu_util_percent": 34.30434782608695, "ram_util_percent": 58.8478260869565}} +{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.42, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.24, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.28, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0, 639.0, 633.0, 582.0, 633.0, 636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0, 630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 0.929753206610253, "mean_processing_ms": 0.25423624145174695, "mean_inference_ms": 1.5125268663026497}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7536000, "num_steps_sampled": 4019200, "sample_time_ms": 20614.803, "load_time_ms": 37.576, "grad_time_ms": 9935.143, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005392418708652258, "policy_loss": -0.002403073711320758, "vf_loss": 83.61144256591797, "vf_explained_var": 0.7692582011222839, "kl": 0.0021780512761324644, "entropy": 1.131287932395935, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4019200, "episodes_total": 10048, "training_iteration": 314, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-25-41", "timestamp": 1660256741, "time_this_iter_s": 33.338226318359375, "time_total_s": 15154.038368940353, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15154.038368940353, "timesteps_since_restore": 4019200, "iterations_since_restore": 314, "perf": {"cpu_util_percent": 34.01914893617022, "ram_util_percent": 58.87021276595746}} +{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 614.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.41}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.82, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0, 630.0, 630.0, 630.0, 627.0, 630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0, 630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9282592182519096, "mean_processing_ms": 0.253938832905362, "mean_inference_ms": 1.511735993488809}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7560000, "num_steps_sampled": 4032000, "sample_time_ms": 21363.9, "load_time_ms": 37.576, "grad_time_ms": 10037.47, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003922081959899515, "policy_loss": -0.007403677329421043, "vf_loss": 83.57759857177734, "vf_explained_var": 0.7612032294273376, "kl": 0.001659790868870914, "entropy": 1.12375009059906, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4032000, "episodes_total": 10080, "training_iteration": 315, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-26-24", "timestamp": 1660256784, "time_this_iter_s": 42.419737100601196, "time_total_s": 15196.458106040955, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15196.458106040955, "timesteps_since_restore": 4032000, "iterations_since_restore": 315, "perf": {"cpu_util_percent": 30.92, "ram_util_percent": 58.89833333333333}} +{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 612.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.395}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.39, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0, 630.0, 639.0, 627.0, 630.0, 630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0, 587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9267730740231102, "mean_processing_ms": 0.2536434066530759, "mean_inference_ms": 1.510930494877483}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7584000, "num_steps_sampled": 4044800, "sample_time_ms": 21428.224, "load_time_ms": 37.331, "grad_time_ms": 10025.054, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037510674446821213, "policy_loss": -0.004000961780548096, "vf_loss": 83.20941925048828, "vf_explained_var": 0.7631545066833496, "kl": 0.0021077950950711966, "entropy": 1.1378254890441895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4044800, "episodes_total": 10112, "training_iteration": 316, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-26-55", "timestamp": 1660256815, "time_this_iter_s": 31.23423171043396, "time_total_s": 15227.692337751389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15227.692337751389, "timesteps_since_restore": 4044800, "iterations_since_restore": 316, "perf": {"cpu_util_percent": 34.638636363636365, "ram_util_percent": 58.979545454545466}} +{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.135}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.07, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0, 630.0, 630.0, 593.0, 582.0, 587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0, 630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9252951052976854, "mean_processing_ms": 0.2533493735803219, "mean_inference_ms": 1.5101286664706006}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7608000, "num_steps_sampled": 4057600, "sample_time_ms": 21664.809, "load_time_ms": 37.646, "grad_time_ms": 10004.469, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035793918650597334, "policy_loss": -0.004444916266947985, "vf_loss": 85.9527359008789, "vf_explained_var": 0.7614016532897949, "kl": 0.0019710592459887266, "entropy": 1.1419222354888916, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4057600, "episodes_total": 10144, "training_iteration": 317, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-27-26", "timestamp": 1660256846, "time_this_iter_s": 31.29483914375305, "time_total_s": 15258.987176895142, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15258.987176895142, "timesteps_since_restore": 4057600, "iterations_since_restore": 317, "perf": {"cpu_util_percent": 35.325, "ram_util_percent": 58.888636363636344}} +{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.105}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.41, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.99, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.99, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.99, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.99, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0, 627.0, 633.0, 530.0, 639.0, 630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9238104630131461, "mean_processing_ms": 0.2530528359146936, "mean_inference_ms": 1.5086822690810806}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7632000, "num_steps_sampled": 4070400, "sample_time_ms": 21837.173, "load_time_ms": 37.475, "grad_time_ms": 10172.214, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033606337383389473, "policy_loss": -0.004180160816758871, "vf_loss": 81.06964111328125, "vf_explained_var": 0.7688854336738586, "kl": 0.0033983252942562103, "entropy": 1.1323426961898804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4070400, "episodes_total": 10176, "training_iteration": 318, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-27-57", "timestamp": 1660256877, "time_this_iter_s": 30.971107959747314, "time_total_s": 15289.958284854889, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15289.958284854889, "timesteps_since_restore": 4070400, "iterations_since_restore": 318, "perf": {"cpu_util_percent": 34.67727272727273, "ram_util_percent": 58.75454545454544}} +{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.2}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.6, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0, 587.0, 639.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0, 579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9223351501134641, "mean_processing_ms": 0.2527584584336856, "mean_inference_ms": 1.5072340124708836}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7656000, "num_steps_sampled": 4083200, "sample_time_ms": 21920.847, "load_time_ms": 37.336, "grad_time_ms": 10402.485, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003446048649493605, "policy_loss": -0.007382390554994345, "vf_loss": 82.95357513427734, "vf_explained_var": 0.759884774684906, "kl": 0.0017484420677646995, "entropy": 1.1367279291152954, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4083200, "episodes_total": 10208, "training_iteration": 319, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-28-31", "timestamp": 1660256911, "time_this_iter_s": 33.19297218322754, "time_total_s": 15323.151257038116, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15323.151257038116, "timesteps_since_restore": 4083200, "iterations_since_restore": 319, "perf": {"cpu_util_percent": 34.074468085106375, "ram_util_percent": 59.221276595744676}} +{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 612.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.085}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.17, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.53, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0, 633.0, 627.0, 627.0, 627.0, 579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0, 582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9208740877938348, "mean_processing_ms": 0.25246719663876194, "mean_inference_ms": 1.5059256221319566}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7680000, "num_steps_sampled": 4096000, "sample_time_ms": 22117.687, "load_time_ms": 37.357, "grad_time_ms": 10501.528, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030171778053045273, "policy_loss": -0.004927590023726225, "vf_loss": 85.14810943603516, "vf_explained_var": 0.76070237159729, "kl": 0.002144080586731434, "entropy": 1.140079379081726, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4096000, "episodes_total": 10240, "training_iteration": 320, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-29-03", "timestamp": 1660256943, "time_this_iter_s": 32.6832230091095, "time_total_s": 15355.834480047226, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15355.834480047226, "timesteps_since_restore": 4096000, "iterations_since_restore": 320, "perf": {"cpu_util_percent": 32.95652173913044, "ram_util_percent": 59.06739130434784}} +{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 613.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.82}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.44, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.81, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0, 639.0, 630.0, 633.0, 630.0, 582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9194369912164846, "mean_processing_ms": 0.25218136491616727, "mean_inference_ms": 1.5049782377407859}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7704000, "num_steps_sampled": 4108800, "sample_time_ms": 22673.048, "load_time_ms": 37.017, "grad_time_ms": 10506.62, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004381106700748205, "policy_loss": -0.0034678278025239706, "vf_loss": 84.1359634399414, "vf_explained_var": 0.762717068195343, "kl": 0.0020634233951568604, "entropy": 1.12932288646698, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4108800, "episodes_total": 10272, "training_iteration": 321, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-29-39", "timestamp": 1660256979, "time_this_iter_s": 35.97740912437439, "time_total_s": 15391.8118891716, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15391.8118891716, "timesteps_since_restore": 4108800, "iterations_since_restore": 321, "perf": {"cpu_util_percent": 32.44705882352941, "ram_util_percent": 58.78039215686273}} +{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 611.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.78}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.96, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0, 567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9180140863586184, "mean_processing_ms": 0.2518987033904737, "mean_inference_ms": 1.5041907922787607}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7728000, "num_steps_sampled": 4121600, "sample_time_ms": 23101.321, "load_time_ms": 36.599, "grad_time_ms": 10589.639, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00461611757054925, "policy_loss": -0.003138140542432666, "vf_loss": 83.23612213134766, "vf_explained_var": 0.7696110606193542, "kl": 0.0018815431976690888, "entropy": 1.1387158632278442, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4121600, "episodes_total": 10304, "training_iteration": 322, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-30-14", "timestamp": 1660257014, "time_this_iter_s": 34.30680704116821, "time_total_s": 15426.118696212769, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15426.118696212769, "timesteps_since_restore": 4121600, "iterations_since_restore": 322, "perf": {"cpu_util_percent": 32.239583333333336, "ram_util_percent": 58.845833333333324}} +{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 612.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.44}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.08, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.01, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.01, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.01, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0, 582.0, 627.0, 633.0, 636.0, 567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0, 587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9165951119530592, "mean_processing_ms": 0.25161700044619856, "mean_inference_ms": 1.5033026848004383}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7752000, "num_steps_sampled": 4134400, "sample_time_ms": 23061.265, "load_time_ms": 36.453, "grad_time_ms": 10583.338, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006471332162618637, "policy_loss": -0.0015162205090746284, "vf_loss": 85.53211212158203, "vf_explained_var": 0.7684184908866882, "kl": 0.002009378978982568, "entropy": 1.1313238143920898, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4134400, "episodes_total": 10336, "training_iteration": 323, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-30-45", "timestamp": 1660257045, "time_this_iter_s": 31.667726039886475, "time_total_s": 15457.786422252655, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15457.786422252655, "timesteps_since_restore": 4134400, "iterations_since_restore": 323, "perf": {"cpu_util_percent": 32.73111111111111, "ram_util_percent": 58.83555555555553}} +{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 610.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.28}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.36, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0, 587.0, 633.0, 582.0, 579.0, 587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0, 633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9151684979449644, "mean_processing_ms": 0.2513316747000567, "mean_inference_ms": 1.5020585872568248}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7776000, "num_steps_sampled": 4147200, "sample_time_ms": 23007.183, "load_time_ms": 36.594, "grad_time_ms": 10309.706, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001403640490025282, "policy_loss": -0.009379498660564423, "vf_loss": 85.44359588623047, "vf_explained_var": 0.7652726769447327, "kl": 0.0018997077131643891, "entropy": 1.136988639831543, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4147200, "episodes_total": 10368, "training_iteration": 324, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-31-15", "timestamp": 1660257075, "time_this_iter_s": 30.05816674232483, "time_total_s": 15487.84458899498, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15487.84458899498, "timesteps_since_restore": 4147200, "iterations_since_restore": 324, "perf": {"cpu_util_percent": 31.88372093023256, "ram_util_percent": 58.74418604651163}} +{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 611.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.985}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.97, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0, 587.0, 567.0, 627.0, 636.0, 633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0, 596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9137344731192653, "mean_processing_ms": 0.2510432909241615, "mean_inference_ms": 1.5004598778963754}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7800000, "num_steps_sampled": 4160000, "sample_time_ms": 21864.49, "load_time_ms": 36.474, "grad_time_ms": 10036.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015124385245144367, "policy_loss": -0.006040909793227911, "vf_loss": 81.20950317382812, "vf_explained_var": 0.7680754661560059, "kl": 0.0019407202489674091, "entropy": 1.135194182395935, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4160000, "episodes_total": 10400, "training_iteration": 325, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-31-44", "timestamp": 1660257104, "time_this_iter_s": 28.25439429283142, "time_total_s": 15516.098983287811, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15516.098983287811, "timesteps_since_restore": 4160000, "iterations_since_restore": 325, "perf": {"cpu_util_percent": 34.2675, "ram_util_percent": 58.785000000000004}} +{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 613.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.815}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.63, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.51, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.15, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0, 570.0, 633.0, 579.0, 639.0, 596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9123094574385331, "mean_processing_ms": 0.25075636823694536, "mean_inference_ms": 1.49889087709384}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7824000, "num_steps_sampled": 4172800, "sample_time_ms": 21942.597, "load_time_ms": 36.578, "grad_time_ms": 9969.611, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004511403385549784, "policy_loss": -0.0029741593170911074, "vf_loss": 80.4991683959961, "vf_explained_var": 0.7677297592163086, "kl": 0.002298202132806182, "entropy": 1.1287130117416382, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4172800, "episodes_total": 10432, "training_iteration": 326, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-32-15", "timestamp": 1660257135, "time_this_iter_s": 31.342418909072876, "time_total_s": 15547.441402196884, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15547.441402196884, "timesteps_since_restore": 4172800, "iterations_since_restore": 326, "perf": {"cpu_util_percent": 35.46363636363637, "ram_util_percent": 58.74999999999999}} +{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 615.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.655}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.11, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.84, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0, 621.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0, 636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9108943505628629, "mean_processing_ms": 0.25047292677413735, "mean_inference_ms": 1.4973202589318924}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7848000, "num_steps_sampled": 4185600, "sample_time_ms": 21889.076, "load_time_ms": 35.811, "grad_time_ms": 9803.849, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004534369800239801, "policy_loss": -0.0032402947545051575, "vf_loss": 83.42310333251953, "vf_explained_var": 0.7677843570709229, "kl": 0.0018213322618976235, "entropy": 1.135262131690979, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4185600, "episodes_total": 10464, "training_iteration": 327, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-32-44", "timestamp": 1660257164, "time_this_iter_s": 29.095314025878906, "time_total_s": 15576.536716222763, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15576.536716222763, "timesteps_since_restore": 4185600, "iterations_since_restore": 327, "perf": {"cpu_util_percent": 33.5219512195122, "ram_util_percent": 58.87073170731708}} +{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 615.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.57}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 189.14, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0, 636.0, 627.0, 630.0, 639.0, 636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0, 573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9094951926222233, "mean_processing_ms": 0.25019344496392953, "mean_inference_ms": 1.495850505323708}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7872000, "num_steps_sampled": 4198400, "sample_time_ms": 21863.329, "load_time_ms": 36.113, "grad_time_ms": 9649.438, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027417896781116724, "policy_loss": -0.00593235669657588, "vf_loss": 92.42369842529297, "vf_explained_var": 0.7659929394721985, "kl": 0.0018632843857631087, "entropy": 1.1364426612854004, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4198400, "episodes_total": 10496, "training_iteration": 328, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-33-13", "timestamp": 1660257193, "time_this_iter_s": 29.17238187789917, "time_total_s": 15605.709098100662, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15605.709098100662, "timesteps_since_restore": 4198400, "iterations_since_restore": 328, "perf": {"cpu_util_percent": 35.3780487804878, "ram_util_percent": 58.91219512195122}} +{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 609.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.77}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.54, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0, 570.0, 636.0, 630.0, 636.0, 573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0, 630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.908108829511899, "mean_processing_ms": 0.24991601885712256, "mean_inference_ms": 1.4943968227370834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7896000, "num_steps_sampled": 4211200, "sample_time_ms": 21816.04, "load_time_ms": 36.245, "grad_time_ms": 9396.983, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00012729612353723496, "policy_loss": -0.007717677857726812, "vf_loss": 81.6099853515625, "vf_explained_var": 0.7742553353309631, "kl": 0.0021955876145511866, "entropy": 1.1412299871444702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4211200, "episodes_total": 10528, "training_iteration": 329, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-33-44", "timestamp": 1660257224, "time_this_iter_s": 30.190826892852783, "time_total_s": 15635.899924993515, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15635.899924993515, "timesteps_since_restore": 4211200, "iterations_since_restore": 329, "perf": {"cpu_util_percent": 32.737209302325574, "ram_util_percent": 59.3720930232558}} +{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 611.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.805}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.01, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0, 590.0, 627.0, 636.0, 636.0, 630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0, 587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9067267612642047, "mean_processing_ms": 0.2496377464704088, "mean_inference_ms": 1.4929051187388651}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7920000, "num_steps_sampled": 4224000, "sample_time_ms": 21531.777, "load_time_ms": 36.36, "grad_time_ms": 9360.92, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021365683060139418, "policy_loss": -0.005692864302545786, "vf_loss": 83.99735260009766, "vf_explained_var": 0.7644996643066406, "kl": 0.0020622028969228268, "entropy": 1.140602469444275, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4224000, "episodes_total": 10560, "training_iteration": 330, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-34-13", "timestamp": 1660257253, "time_this_iter_s": 29.480799913406372, "time_total_s": 15665.380724906921, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15665.380724906921, "timesteps_since_restore": 4224000, "iterations_since_restore": 330, "perf": {"cpu_util_percent": 35.38095238095239, "ram_util_percent": 58.940476190476204}} +{"episode_reward_max": 639.0, "episode_reward_min": 482.0, "episode_reward_mean": 614.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.18}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.96, "shaped_reward_min": 149, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.43, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0, 639.0, 516.0, 582.0, 633.0, 587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0, 639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9053530084533031, "mean_processing_ms": 0.24936183583486593, "mean_inference_ms": 1.491518263590993}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7944000, "num_steps_sampled": 4236800, "sample_time_ms": 21154.165, "load_time_ms": 36.748, "grad_time_ms": 9274.795, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -8.082172280410305e-05, "policy_loss": -0.00758820166811347, "vf_loss": 80.76020050048828, "vf_explained_var": 0.765857994556427, "kl": 0.001791672664694488, "entropy": 1.137281060218811, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4236800, "episodes_total": 10592, "training_iteration": 331, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-34-44", "timestamp": 1660257284, "time_this_iter_s": 31.342971086502075, "time_total_s": 15696.723695993423, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15696.723695993423, "timesteps_since_restore": 4236800, "iterations_since_restore": 331, "perf": {"cpu_util_percent": 33.804545454545455, "ram_util_percent": 58.92272727272726}} +{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 616.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 308.005}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.81, "shaped_reward_min": 165, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.05, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.74, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.74, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.74, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0, 579.0, 636.0, 579.0, 630.0, 639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0, 582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9039942028756937, "mean_processing_ms": 0.24909087688920636, "mean_inference_ms": 1.490273663500262}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7968000, "num_steps_sampled": 4249600, "sample_time_ms": 21096.339, "load_time_ms": 36.622, "grad_time_ms": 9224.002, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002680680714547634, "policy_loss": -0.005275225732475519, "vf_loss": 85.20003509521484, "vf_explained_var": 0.7707304954528809, "kl": 0.00193729845341295, "entropy": 1.128203272819519, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4249600, "episodes_total": 10624, "training_iteration": 332, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-35-18", "timestamp": 1660257318, "time_this_iter_s": 33.216859102249146, "time_total_s": 15729.940555095673, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15729.940555095673, "timesteps_since_restore": 4249600, "iterations_since_restore": 332, "perf": {"cpu_util_percent": 34.92765957446809, "ram_util_percent": 58.93617021276598}} +{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 613.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.97}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.74, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.39, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.05, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0, 639.0, 582.0, 627.0, 630.0, 582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0, 633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9026482821594658, "mean_processing_ms": 0.24882351646773487, "mean_inference_ms": 1.4891609969323358}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7992000, "num_steps_sampled": 4262400, "sample_time_ms": 21161.346, "load_time_ms": 36.685, "grad_time_ms": 9083.544, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006233640480786562, "policy_loss": -0.0020334760192781687, "vf_loss": 88.32830047607422, "vf_explained_var": 0.7596514821052551, "kl": 0.0019263379508629441, "entropy": 1.1314295530319214, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4262400, "episodes_total": 10656, "training_iteration": 333, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-35-49", "timestamp": 1660257349, "time_this_iter_s": 30.911512851715088, "time_total_s": 15760.852067947388, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15760.852067947388, "timesteps_since_restore": 4262400, "iterations_since_restore": 333, "perf": {"cpu_util_percent": 33.54318181818183, "ram_util_percent": 59.031818181818196}} +{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 609.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.7}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.4, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.43, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.98, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.66, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.66, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.66, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0, 633.0, 576.0, 636.0, 582.0, 633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0, 636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9013131460094013, "mean_processing_ms": 0.24855772824532033, "mean_inference_ms": 1.488151121252854}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8016000, "num_steps_sampled": 4275200, "sample_time_ms": 21421.637, "load_time_ms": 36.594, "grad_time_ms": 9205.096, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013225991278886795, "policy_loss": -0.00687911594286561, "vf_loss": 87.6913833618164, "vf_explained_var": 0.7619670033454895, "kl": 0.0022256800439208746, "entropy": 1.1348274946212769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4275200, "episodes_total": 10688, "training_iteration": 334, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-36-22", "timestamp": 1660257382, "time_this_iter_s": 33.877387046813965, "time_total_s": 15794.729454994202, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15794.729454994202, "timesteps_since_restore": 4275200, "iterations_since_restore": 334, "perf": {"cpu_util_percent": 34.24791666666667, "ram_util_percent": 59.02708333333334}} +{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 608.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.3}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.6, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.75, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0, 630.0, 582.0, 633.0, 587.0, 636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0, 633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8999851577327678, "mean_processing_ms": 0.24829403649185813, "mean_inference_ms": 1.4872182879909173}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8040000, "num_steps_sampled": 4288000, "sample_time_ms": 21915.303, "load_time_ms": 37.24, "grad_time_ms": 9393.89, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024422004353255033, "policy_loss": -0.005287020932883024, "vf_loss": 82.97665405273438, "vf_explained_var": 0.7636620402336121, "kl": 0.001807666034437716, "entropy": 1.136885643005371, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4288000, "episodes_total": 10720, "training_iteration": 335, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-36-58", "timestamp": 1660257418, "time_this_iter_s": 35.08472490310669, "time_total_s": 15829.814179897308, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15829.814179897308, "timesteps_since_restore": 4288000, "iterations_since_restore": 335, "perf": {"cpu_util_percent": 30.30408163265306, "ram_util_percent": 58.94081632653061}} +{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 608.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 179.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.165}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.93, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.45, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.45, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.45, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0, 582.0, 636.0, 630.0, 633.0, 633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0, 633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.898665270354117, "mean_processing_ms": 0.24803277399210055, "mean_inference_ms": 1.4863570559836363}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8064000, "num_steps_sampled": 4300800, "sample_time_ms": 22027.266, "load_time_ms": 37.353, "grad_time_ms": 9612.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008330469136126339, "policy_loss": -0.006751233246177435, "vf_loss": 81.52507781982422, "vf_explained_var": 0.7658050656318665, "kl": 0.001944715972058475, "entropy": 1.1364573240280151, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4300800, "episodes_total": 10752, "training_iteration": 336, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-37-32", "timestamp": 1660257452, "time_this_iter_s": 34.64702320098877, "time_total_s": 15864.461203098297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15864.461203098297, "timesteps_since_restore": 4300800, "iterations_since_restore": 336, "perf": {"cpu_util_percent": 29.189795918367345, "ram_util_percent": 58.936734693877554}} +{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 608.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.205}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.41, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.46, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0, 561.0, 639.0, 624.0, 587.0, 633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0, 630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8973618509446918, "mean_processing_ms": 0.2477754616951261, "mean_inference_ms": 1.4857111837613974}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8088000, "num_steps_sampled": 4313600, "sample_time_ms": 22572.125, "load_time_ms": 37.728, "grad_time_ms": 9903.43, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030143249314278364, "policy_loss": -0.004624274093657732, "vf_loss": 82.12947845458984, "vf_explained_var": 0.7718231081962585, "kl": 0.0020513928029686213, "entropy": 1.1487096548080444, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4313600, "episodes_total": 10784, "training_iteration": 337, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-38-10", "timestamp": 1660257490, "time_this_iter_s": 37.458003759384155, "time_total_s": 15901.919206857681, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15901.919206857681, "timesteps_since_restore": 4313600, "iterations_since_restore": 337, "perf": {"cpu_util_percent": 29.675471698113206, "ram_util_percent": 58.94905660377358}} +{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 610.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.345}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.49, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0, 630.0, 639.0, 633.0, 584.0, 630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0, 587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8960540337881886, "mean_processing_ms": 0.24751510966747803, "mean_inference_ms": 1.4847704330515064}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8112000, "num_steps_sampled": 4326400, "sample_time_ms": 22480.697, "load_time_ms": 37.651, "grad_time_ms": 10202.093, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001484702923335135, "policy_loss": -0.006018726620823145, "vf_loss": 80.70446014404297, "vf_explained_var": 0.7642549872398376, "kl": 0.0017236651619896293, "entropy": 1.1340447664260864, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4326400, "episodes_total": 10816, "training_iteration": 338, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-38-41", "timestamp": 1660257521, "time_this_iter_s": 31.244572162628174, "time_total_s": 15933.16377902031, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15933.16377902031, "timesteps_since_restore": 4326400, "iterations_since_restore": 338, "perf": {"cpu_util_percent": 27.328888888888894, "ram_util_percent": 59.27555555555555}} +{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 612.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 306.0}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.4, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.62, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0, 633.0, 576.0, 633.0, 633.0, 587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0, 633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8947470159821566, "mean_processing_ms": 0.2472537044161699, "mean_inference_ms": 1.483669994240604}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8136000, "num_steps_sampled": 4339200, "sample_time_ms": 22385.99, "load_time_ms": 37.99, "grad_time_ms": 10282.952, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002800325455609709, "policy_loss": -0.007263503968715668, "vf_loss": 81.1025161743164, "vf_explained_var": 0.7635498642921448, "kl": 0.0021122132893651724, "entropy": 1.1334240436553955, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4339200, "episodes_total": 10848, "training_iteration": 339, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-39-11", "timestamp": 1660257551, "time_this_iter_s": 30.060129165649414, "time_total_s": 15963.223908185959, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15963.223908185959, "timesteps_since_restore": 4339200, "iterations_since_restore": 339, "perf": {"cpu_util_percent": 32.38333333333334, "ram_util_percent": 58.776190476190465}} +{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 611.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 305.805}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.01, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0, 630.0, 573.0, 573.0, 624.0, 633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0, 587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8934318351724546, "mean_processing_ms": 0.24699024834277958, "mean_inference_ms": 1.4821846503211202}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8160000, "num_steps_sampled": 4352000, "sample_time_ms": 22391.649, "load_time_ms": 38.471, "grad_time_ms": 10443.746, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004414581228047609, "policy_loss": -0.003194813383743167, "vf_loss": 81.79281616210938, "vf_explained_var": 0.764918863773346, "kl": 0.0018889306811615825, "entropy": 1.1397979259490967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4352000, "episodes_total": 10880, "training_iteration": 340, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-39-42", "timestamp": 1660257582, "time_this_iter_s": 31.150686979293823, "time_total_s": 15994.374595165253, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15994.374595165253, "timesteps_since_restore": 4352000, "iterations_since_restore": 340, "perf": {"cpu_util_percent": 27.049999999999997, "ram_util_percent": 58.795454545454525}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.27}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.14, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0, 627.0, 587.0, 582.0, 630.0, 587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0, 630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8921345690140595, "mean_processing_ms": 0.24673048959961144, "mean_inference_ms": 1.4810833291212553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8184000, "num_steps_sampled": 4364800, "sample_time_ms": 22778.758, "load_time_ms": 38.164, "grad_time_ms": 10656.478, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005775378551334143, "policy_loss": -0.00215825904160738, "vf_loss": 85.0276870727539, "vf_explained_var": 0.7658646106719971, "kl": 0.0019542332738637924, "entropy": 1.1382619142532349, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4364800, "episodes_total": 10912, "training_iteration": 341, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-40-20", "timestamp": 1660257620, "time_this_iter_s": 37.338398933410645, "time_total_s": 16031.712994098663, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16031.712994098663, "timesteps_since_restore": 4364800, "iterations_since_restore": 341, "perf": {"cpu_util_percent": 27.592452830188673, "ram_util_percent": 58.783018867924525}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.22}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.64, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.62, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.93, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.93, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.93, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0, 636.0, 630.0, 587.0, 636.0, 630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0, 633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8908455849362553, "mean_processing_ms": 0.24647214238020762, "mean_inference_ms": 1.4799101322874493}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8208000, "num_steps_sampled": 4377600, "sample_time_ms": 22385.575, "load_time_ms": 38.145, "grad_time_ms": 10547.897, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005002778489142656, "policy_loss": -0.002570929704234004, "vf_loss": 81.44794464111328, "vf_explained_var": 0.765848696231842, "kl": 0.002198006259277463, "entropy": 1.1421762704849243, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4377600, "episodes_total": 10944, "training_iteration": 342, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-40-48", "timestamp": 1660257648, "time_this_iter_s": 28.1991069316864, "time_total_s": 16059.91210103035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16059.91210103035, "timesteps_since_restore": 4377600, "iterations_since_restore": 342, "perf": {"cpu_util_percent": 30.8525, "ram_util_percent": 58.825}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 608.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.36}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.47, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0, 582.0, 633.0, 582.0, 630.0, 633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0, 630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8895732427757973, "mean_processing_ms": 0.24621711936805896, "mean_inference_ms": 1.4789333750052633}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8232000, "num_steps_sampled": 4390400, "sample_time_ms": 22479.914, "load_time_ms": 38.369, "grad_time_ms": 10420.997, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0025722135324031115, "policy_loss": -0.00497409887611866, "vf_loss": 81.19109344482422, "vf_explained_var": 0.7659382820129395, "kl": 0.0019239649409428239, "entropy": 1.1455968618392944, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4390400, "episodes_total": 10976, "training_iteration": 343, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-41-18", "timestamp": 1660257678, "time_this_iter_s": 30.595246076583862, "time_total_s": 16090.507347106934, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16090.507347106934, "timesteps_since_restore": 4390400, "iterations_since_restore": 343, "perf": {"cpu_util_percent": 28.927906976744183, "ram_util_percent": 58.81860465116278}} +{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 607.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 303.955}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.11, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.68, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.04, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0, 582.0, 630.0, 633.0, 639.0, 630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0, 627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 0.888300023019413, "mean_processing_ms": 0.24596248593117787, "mean_inference_ms": 1.4777042667426168}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8256000, "num_steps_sampled": 4403200, "sample_time_ms": 22324.862, "load_time_ms": 38.863, "grad_time_ms": 10570.078, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001806688029319048, "policy_loss": -0.005986546631902456, "vf_loss": 83.65050506591797, "vf_explained_var": 0.7647177577018738, "kl": 0.002452569780871272, "entropy": 1.1436399221420288, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4403200, "episodes_total": 11008, "training_iteration": 344, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-41-52", "timestamp": 1660257712, "time_this_iter_s": 33.82224774360657, "time_total_s": 16124.32959485054, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16124.32959485054, "timesteps_since_restore": 4403200, "iterations_since_restore": 344, "perf": {"cpu_util_percent": 29.40625, "ram_util_percent": 58.83958333333334}} +{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 603.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.93}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.26, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.41, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.95, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.95, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.95, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0, 582.0, 639.0, 579.0, 564.0, 627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0, 567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8870510518721875, "mean_processing_ms": 0.24571562902687222, "mean_inference_ms": 1.477060351190489}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8280000, "num_steps_sampled": 4416000, "sample_time_ms": 22762.706, "load_time_ms": 38.824, "grad_time_ms": 10571.457, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005129000172019005, "policy_loss": -0.0031147233676165342, "vf_loss": 88.14037322998047, "vf_explained_var": 0.763336181640625, "kl": 0.0019052595598623157, "entropy": 1.1406329870224, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4416000, "episodes_total": 11040, "training_iteration": 345, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-42-32", "timestamp": 1660257752, "time_this_iter_s": 39.47760009765625, "time_total_s": 16163.807194948196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16163.807194948196, "timesteps_since_restore": 4416000, "iterations_since_restore": 345, "perf": {"cpu_util_percent": 25.21272727272727, "ram_util_percent": 58.801818181818156}} +{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 602.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.185}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.57, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.95, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.95, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.95, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0, 636.0, 633.0, 630.0, 576.0, 567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0, 582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.885814344606481, "mean_processing_ms": 0.24547265426163356, "mean_inference_ms": 1.476598213105171}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8304000, "num_steps_sampled": 4428800, "sample_time_ms": 22961.91, "load_time_ms": 38.412, "grad_time_ms": 10610.867, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004017222672700882, "policy_loss": -0.007523353677242994, "vf_loss": 84.93380737304688, "vf_explained_var": 0.7618634104728699, "kl": 0.0019265868468210101, "entropy": 1.1366103887557983, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4428800, "episodes_total": 11072, "training_iteration": 346, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-43-09", "timestamp": 1660257789, "time_this_iter_s": 37.03333592414856, "time_total_s": 16200.840530872345, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16200.840530872345, "timesteps_since_restore": 4428800, "iterations_since_restore": 346, "perf": {"cpu_util_percent": 30.683018867924527, "ram_util_percent": 58.82264150943394}} +{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 600.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.28}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.96, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0, 630.0, 630.0, 590.0, 630.0, 582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0, 627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8845895698087, "mean_processing_ms": 0.24523366351665968, "mean_inference_ms": 1.4761306525958986}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8328000, "num_steps_sampled": 4441600, "sample_time_ms": 22533.553, "load_time_ms": 38.187, "grad_time_ms": 10390.855, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007659171824343503, "policy_loss": -0.008885729126632214, "vf_loss": 86.87432861328125, "vf_explained_var": 0.7518642544746399, "kl": 0.0019218157976865768, "entropy": 1.1352366209030151, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4441600, "episodes_total": 11104, "training_iteration": 347, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-43-40", "timestamp": 1660257820, "time_this_iter_s": 30.97221803665161, "time_total_s": 16231.812748908997, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16231.812748908997, "timesteps_since_restore": 4441600, "iterations_since_restore": 347, "perf": {"cpu_util_percent": 33.49999999999999, "ram_util_percent": 59.33636363636363}} +{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 602.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.26}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.32, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0, 639.0, 639.0, 587.0, 582.0, 627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0, 630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8833591807666914, "mean_processing_ms": 0.24499142247523886, "mean_inference_ms": 1.4752658321565872}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8352000, "num_steps_sampled": 4454400, "sample_time_ms": 22743.076, "load_time_ms": 38.058, "grad_time_ms": 10274.124, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003061985597014427, "policy_loss": -0.004540739115327597, "vf_loss": 81.69985961914062, "vf_explained_var": 0.7570112347602844, "kl": 0.0020776980090886354, "entropy": 1.1345181465148926, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4454400, "episodes_total": 11136, "training_iteration": 348, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-44-12", "timestamp": 1660257852, "time_this_iter_s": 32.17093515396118, "time_total_s": 16263.983684062958, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16263.983684062958, "timesteps_since_restore": 4454400, "iterations_since_restore": 348, "perf": {"cpu_util_percent": 33.459999999999994, "ram_util_percent": 58.76888888888889}} +{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 601.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.855}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.71, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.73, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0, 639.0, 587.0, 587.0, 541.0, 630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8821320498213592, "mean_processing_ms": 0.24474905703796943, "mean_inference_ms": 1.4742298243357532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8376000, "num_steps_sampled": 4467200, "sample_time_ms": 22958.396, "load_time_ms": 37.77, "grad_time_ms": 10335.342, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004589398857206106, "policy_loss": -0.003508263034746051, "vf_loss": 86.6531982421875, "vf_explained_var": 0.7629675269126892, "kl": 0.0021643172949552536, "entropy": 1.1353298425674438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4467200, "episodes_total": 11168, "training_iteration": 349, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-44-45", "timestamp": 1660257885, "time_this_iter_s": 32.81572699546814, "time_total_s": 16296.799411058426, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16296.799411058426, "timesteps_since_restore": 4467200, "iterations_since_restore": 349, "perf": {"cpu_util_percent": 33.702173913043474, "ram_util_percent": 58.7978260869565}} +{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 611.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.705}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.81, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.7, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0, 576.0, 636.0, 636.0, 630.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0, 627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8809197598148512, "mean_processing_ms": 0.2445094292995116, "mean_inference_ms": 1.4734182783968888}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8400000, "num_steps_sampled": 4480000, "sample_time_ms": 23484.152, "load_time_ms": 37.181, "grad_time_ms": 10284.073, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0043442933820188046, "policy_loss": -0.002717310329899192, "vf_loss": 76.19442749023438, "vf_explained_var": 0.7705621719360352, "kl": 0.0019369550282135606, "entropy": 1.1156750917434692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4480000, "episodes_total": 11200, "training_iteration": 350, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-45-21", "timestamp": 1660257921, "time_this_iter_s": 35.891582012176514, "time_total_s": 16332.690993070602, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16332.690993070602, "timesteps_since_restore": 4480000, "iterations_since_restore": 350, "perf": {"cpu_util_percent": 33.81372549019608, "ram_util_percent": 58.90000000000001}} +{"episode_reward_max": 639.0, "episode_reward_min": 524.0, "episode_reward_mean": 609.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.785}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.57, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.79, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.79, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.79, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0, 633.0, 573.0, 633.0, 576.0, 627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0, 636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.879725637823849, "mean_processing_ms": 0.24427457411119732, "mean_inference_ms": 1.472868024343041}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8424000, "num_steps_sampled": 4492800, "sample_time_ms": 23466.741, "load_time_ms": 37.693, "grad_time_ms": 10196.154, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034591767471283674, "policy_loss": -0.0040799533016979694, "vf_loss": 81.06632232666016, "vf_explained_var": 0.7659358978271484, "kl": 0.0018826290033757687, "entropy": 1.135020136833191, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4492800, "episodes_total": 11232, "training_iteration": 351, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-45-57", "timestamp": 1660257957, "time_this_iter_s": 36.290544748306274, "time_total_s": 16368.981537818909, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16368.981537818909, "timesteps_since_restore": 4492800, "iterations_since_restore": 351, "perf": {"cpu_util_percent": 32.60196078431373, "ram_util_percent": 58.86274509803921}} +{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 613.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.59}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.98, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 630.0, 639.0, 630.0, 582.0, 636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.878542017918733, "mean_processing_ms": 0.2440427958629624, "mean_inference_ms": 1.4723767990503938}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8448000, "num_steps_sampled": 4505600, "sample_time_ms": 23873.275, "load_time_ms": 37.543, "grad_time_ms": 10389.923, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013407707447186112, "policy_loss": -0.006131558213382959, "vf_loss": 80.36180877685547, "vf_explained_var": 0.7696139812469482, "kl": 0.0018947357311844826, "entropy": 1.127698540687561, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4505600, "episodes_total": 11264, "training_iteration": 352, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-46-31", "timestamp": 1660257991, "time_this_iter_s": 34.20055317878723, "time_total_s": 16403.182090997696, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16403.182090997696, "timesteps_since_restore": 4505600, "iterations_since_restore": 352, "perf": {"cpu_util_percent": 33.57142857142857, "ram_util_percent": 58.8061224489796}} +{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 602.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.135}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.07, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0, 633.0, 633.0, 636.0, 636.0, 636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0, 567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8773547258582282, "mean_processing_ms": 0.24380845402332824, "mean_inference_ms": 1.471635610090532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8472000, "num_steps_sampled": 4518400, "sample_time_ms": 23706.56, "load_time_ms": 37.425, "grad_time_ms": 10690.015, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011104041477665305, "policy_loss": -0.00902702659368515, "vf_loss": 84.78372955322266, "vf_explained_var": 0.7670722007751465, "kl": 0.0019426337676122785, "entropy": 1.1235073804855347, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4518400, "episodes_total": 11296, "training_iteration": 353, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-47-03", "timestamp": 1660258023, "time_this_iter_s": 31.92423105239868, "time_total_s": 16435.106322050095, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16435.106322050095, "timesteps_since_restore": 4518400, "iterations_since_restore": 353, "perf": {"cpu_util_percent": 34.76222222222222, "ram_util_percent": 59.38222222222222}} +{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 600.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.055}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.51, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0, 587.0, 573.0, 630.0, 582.0, 567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8761658335288303, "mean_processing_ms": 0.24357312011055882, "mean_inference_ms": 1.4706370891819096}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8496000, "num_steps_sampled": 4531200, "sample_time_ms": 23737.686, "load_time_ms": 37.309, "grad_time_ms": 10476.005, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014189122011885047, "policy_loss": -0.006124518811702728, "vf_loss": 81.1131591796875, "vf_explained_var": 0.7619540095329285, "kl": 0.002155100228264928, "entropy": 1.1357545852661133, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4531200, "episodes_total": 11328, "training_iteration": 354, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-47-35", "timestamp": 1660258055, "time_this_iter_s": 31.991327047348022, "time_total_s": 16467.097649097443, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16467.097649097443, "timesteps_since_restore": 4531200, "iterations_since_restore": 354, "perf": {"cpu_util_percent": 34.031111111111116, "ram_util_percent": 59.01333333333334}} +{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 601.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 300.845}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.29, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0, 636.0, 630.0, 582.0, 630.0, 519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0, 587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.874971949450553, "mean_processing_ms": 0.24333505417716514, "mean_inference_ms": 1.4694074728869129}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8520000, "num_steps_sampled": 4544000, "sample_time_ms": 22906.057, "load_time_ms": 36.777, "grad_time_ms": 10398.662, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002145820064470172, "policy_loss": -0.005216358229517937, "vf_loss": 79.28690338134766, "vf_explained_var": 0.7675671577453613, "kl": 0.0018057804554700851, "entropy": 1.1330245733261108, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4544000, "episodes_total": 11360, "training_iteration": 355, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-48-06", "timestamp": 1660258086, "time_this_iter_s": 30.381834983825684, "time_total_s": 16497.47948408127, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16497.47948408127, "timesteps_since_restore": 4544000, "iterations_since_restore": 355, "perf": {"cpu_util_percent": 34.25348837209302, "ram_util_percent": 58.85813953488373}} +{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 605.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 302.665}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.53, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0, 636.0, 513.0, 630.0, 579.0, 587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0, 582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8737833276000079, "mean_processing_ms": 0.243097780431969, "mean_inference_ms": 1.4681762334073296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8544000, "num_steps_sampled": 4556800, "sample_time_ms": 22541.516, "load_time_ms": 36.891, "grad_time_ms": 10138.825, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00407541124150157, "policy_loss": -0.0034555860329419374, "vf_loss": 80.97249603271484, "vf_explained_var": 0.7684476375579834, "kl": 0.001921386457979679, "entropy": 1.1324900388717651, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4556800, "episodes_total": 11392, "training_iteration": 356, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-48-36", "timestamp": 1660258116, "time_this_iter_s": 30.78407096862793, "time_total_s": 16528.263555049896, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16528.263555049896, "timesteps_since_restore": 4556800, "iterations_since_restore": 356, "perf": {"cpu_util_percent": 32.61818181818182, "ram_util_percent": 59.45681818181819}} +{"episode_reward_max": 639.0, "episode_reward_min": 539.0, "episode_reward_mean": 609.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 304.845}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.69, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.62, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0, 633.0, 579.0, 582.0, 582.0, 582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0, 630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8725992979631204, "mean_processing_ms": 0.24286049780551075, "mean_inference_ms": 1.4669091441555409}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8568000, "num_steps_sampled": 4569600, "sample_time_ms": 22487.797, "load_time_ms": 37.05, "grad_time_ms": 10111.337, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030587399378418922, "policy_loss": -0.004343332722783089, "vf_loss": 79.63693237304688, "vf_explained_var": 0.7643921971321106, "kl": 0.0021028893534094095, "entropy": 1.1232417821884155, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4569600, "episodes_total": 11424, "training_iteration": 357, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-49-07", "timestamp": 1660258147, "time_this_iter_s": 30.161853790283203, "time_total_s": 16558.42540884018, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16558.42540884018, "timesteps_since_restore": 4569600, "iterations_since_restore": 357, "perf": {"cpu_util_percent": 33.550000000000004, "ram_util_percent": 58.85714285714285}} +{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 606.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.095}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.19, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0, 633.0, 627.0, 576.0, 630.0, 630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0, 636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.871423269952275, "mean_processing_ms": 0.24262389296401352, "mean_inference_ms": 1.4655941462374882}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8592000, "num_steps_sampled": 4582400, "sample_time_ms": 22265.267, "load_time_ms": 37.124, "grad_time_ms": 9998.565, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00020107599266339093, "policy_loss": -0.007807094603776932, "vf_loss": 81.7130355834961, "vf_explained_var": 0.7695291638374329, "kl": 0.0018338669324293733, "entropy": 1.1305631399154663, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4582400, "episodes_total": 11456, "training_iteration": 358, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-49-36", "timestamp": 1660258176, "time_this_iter_s": 28.821206092834473, "time_total_s": 16587.246614933014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16587.246614933014, "timesteps_since_restore": 4582400, "iterations_since_restore": 358, "perf": {"cpu_util_percent": 32.69024390243902, "ram_util_percent": 58.856097560975606}} +{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 604.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.47}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.74, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0, 576.0, 579.0, 582.0, 584.0, 636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0, 570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8702563301594608, "mean_processing_ms": 0.24238978711918313, "mean_inference_ms": 1.4642680017402931}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8616000, "num_steps_sampled": 4595200, "sample_time_ms": 22073.451, "load_time_ms": 37.129, "grad_time_ms": 9911.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011308585526421666, "policy_loss": -0.006429137196391821, "vf_loss": 81.29705047607422, "vf_explained_var": 0.7726876139640808, "kl": 0.0017355438321828842, "entropy": 1.139426350593567, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4595200, "episodes_total": 11488, "training_iteration": 359, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-50-06", "timestamp": 1660258206, "time_this_iter_s": 30.0307719707489, "time_total_s": 16617.277386903763, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16617.277386903763, "timesteps_since_restore": 4595200, "iterations_since_restore": 359, "perf": {"cpu_util_percent": 36.002325581395354, "ram_util_percent": 58.82325581395349}} +{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 605.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.73}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.86, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0, 584.0, 627.0, 627.0, 576.0, 570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0, 579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.86909953447402, "mean_processing_ms": 0.24215862097735263, "mean_inference_ms": 1.4630909533739367}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8640000, "num_steps_sampled": 4608000, "sample_time_ms": 21874.868, "load_time_ms": 37.48, "grad_time_ms": 9746.645, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013312319060787559, "policy_loss": -0.005779942963272333, "vf_loss": 76.8069839477539, "vf_explained_var": 0.7698413729667664, "kl": 0.001875889953225851, "entropy": 1.1390520334243774, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4608000, "episodes_total": 11520, "training_iteration": 360, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-50-38", "timestamp": 1660258238, "time_this_iter_s": 32.26046180725098, "time_total_s": 16649.537848711014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16649.537848711014, "timesteps_since_restore": 4608000, "iterations_since_restore": 360, "perf": {"cpu_util_percent": 32.559999999999995, "ram_util_percent": 58.84666666666664}} +{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 605.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.965}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.33, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0, 576.0, 630.0, 578.0, 467.0, 579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0, 636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8679613450602781, "mean_processing_ms": 0.24193358188716718, "mean_inference_ms": 1.4622622611677922}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8664000, "num_steps_sampled": 4620800, "sample_time_ms": 21811.818, "load_time_ms": 37.251, "grad_time_ms": 9734.507, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007378067821264267, "policy_loss": -0.006473819259554148, "vf_loss": 77.80099487304688, "vf_explained_var": 0.7715656161308289, "kl": 0.0017216805135831237, "entropy": 1.1369411945343018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4620800, "episodes_total": 11552, "training_iteration": 361, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-51-13", "timestamp": 1660258273, "time_this_iter_s": 35.53581404685974, "time_total_s": 16685.073662757874, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16685.073662757874, "timesteps_since_restore": 4620800, "iterations_since_restore": 361, "perf": {"cpu_util_percent": 32.418, "ram_util_percent": 58.85999999999999}} +{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 608.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.125}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.05, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0, 582.0, 636.0, 579.0, 582.0, 636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0, 579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8668385855769685, "mean_processing_ms": 0.2417133493697533, "mean_inference_ms": 1.4618269528034153}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8688000, "num_steps_sampled": 4633600, "sample_time_ms": 22112.322, "load_time_ms": 37.291, "grad_time_ms": 9542.038, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006689311121590436, "policy_loss": -0.006603518966585398, "vf_loss": 78.373291015625, "vf_explained_var": 0.7716686129570007, "kl": 0.0016899490728974342, "entropy": 1.1297602653503418, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4633600, "episodes_total": 11584, "training_iteration": 362, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-51-49", "timestamp": 1660258309, "time_this_iter_s": 35.281026124954224, "time_total_s": 16720.354688882828, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16720.354688882828, "timesteps_since_restore": 4633600, "iterations_since_restore": 362, "perf": {"cpu_util_percent": 29.87, "ram_util_percent": 58.83999999999998}} +{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 605.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.605}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.01, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.27, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.69, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0, 639.0, 636.0, 624.0, 636.0, 579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0, 624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8657202120154665, "mean_processing_ms": 0.2414933019778603, "mean_inference_ms": 1.4613549029197088}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8712000, "num_steps_sampled": 4646400, "sample_time_ms": 22230.157, "load_time_ms": 37.429, "grad_time_ms": 9536.697, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00019748850900214165, "policy_loss": -0.00744326738640666, "vf_loss": 82.0882568359375, "vf_explained_var": 0.7681159377098083, "kl": 0.0019463537028059363, "entropy": 1.1361408233642578, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4646400, "episodes_total": 11616, "training_iteration": 363, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-52-22", "timestamp": 1660258342, "time_this_iter_s": 33.05159020423889, "time_total_s": 16753.406279087067, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16753.406279087067, "timesteps_since_restore": 4646400, "iterations_since_restore": 363, "perf": {"cpu_util_percent": 32.35531914893617, "ram_util_percent": 58.848936170212795}} +{"episode_reward_max": 639.0, "episode_reward_min": 353.0, "episode_reward_mean": 605.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.71}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.22, "shaped_reward_min": 113, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.62, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.68, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0, 525.0, 579.0, 627.0, 633.0, 624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0, 582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8646062243279308, "mean_processing_ms": 0.24127430142106637, "mean_inference_ms": 1.4608685672633468}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8736000, "num_steps_sampled": 4659200, "sample_time_ms": 22504.157, "load_time_ms": 37.127, "grad_time_ms": 9578.876, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00030447044991888106, "policy_loss": -0.007189334835857153, "vf_loss": 80.5628433227539, "vf_explained_var": 0.7757663130760193, "kl": 0.002137100091204047, "entropy": 1.1249442100524902, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4659200, "episodes_total": 11648, "training_iteration": 364, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-52-57", "timestamp": 1660258377, "time_this_iter_s": 35.15313506126404, "time_total_s": 16788.55941414833, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16788.55941414833, "timesteps_since_restore": 4659200, "iterations_since_restore": 364, "perf": {"cpu_util_percent": 30.822448979591837, "ram_util_percent": 58.930612244897965}} +{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 600.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 300.455}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 184.91, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0, 582.0, 582.0, 579.0, 587.0, 582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0, 633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8635010378078678, "mean_processing_ms": 0.24105756914616358, "mean_inference_ms": 1.4603867065067317}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8760000, "num_steps_sampled": 4672000, "sample_time_ms": 23125.247, "load_time_ms": 37.852, "grad_time_ms": 9835.319, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003108972916379571, "policy_loss": -0.004532767925411463, "vf_loss": 82.0846176147461, "vf_explained_var": 0.7727766036987305, "kl": 0.00231738924048841, "entropy": 1.1334295272827148, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4672000, "episodes_total": 11680, "training_iteration": 365, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-53-36", "timestamp": 1660258416, "time_this_iter_s": 39.16720676422119, "time_total_s": 16827.726620912552, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16827.726620912552, "timesteps_since_restore": 4672000, "iterations_since_restore": 365, "perf": {"cpu_util_percent": 31.412499999999998, "ram_util_percent": 59.01071428571428}} +{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 599.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 299.95}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 184.3, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0, 633.0, 639.0, 576.0, 630.0, 633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0, 627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8623994673017462, "mean_processing_ms": 0.24084175116520762, "mean_inference_ms": 1.4598463496186935}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8784000, "num_steps_sampled": 4684800, "sample_time_ms": 23187.668, "load_time_ms": 38.066, "grad_time_ms": 10176.48, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020015325862914324, "policy_loss": -0.005890776868909597, "vf_loss": 84.59882354736328, "vf_explained_var": 0.7721861004829407, "kl": 0.002045721048489213, "entropy": 1.1351399421691895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4684800, "episodes_total": 11712, "training_iteration": 366, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-54-11", "timestamp": 1660258451, "time_this_iter_s": 34.82252907752991, "time_total_s": 16862.54914999008, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16862.54914999008, "timesteps_since_restore": 4684800, "iterations_since_restore": 366, "perf": {"cpu_util_percent": 28.122448979591837, "ram_util_percent": 58.88775510204081}} +{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 598.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 299.415}, "custom_metrics": {"sparse_reward_mean": 207.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 183.63, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.72, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.1, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0, 621.0, 582.0, 630.0, 636.0, 627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0, 630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8612908936403926, "mean_processing_ms": 0.24062264484082838, "mean_inference_ms": 1.4590268461349842}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8808000, "num_steps_sampled": 4697600, "sample_time_ms": 23071.295, "load_time_ms": 37.913, "grad_time_ms": 10615.56, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0038227650802582502, "policy_loss": -0.003672233084216714, "vf_loss": 80.5904312133789, "vf_explained_var": 0.7626190185546875, "kl": 0.0024631840642541647, "entropy": 1.128088116645813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4697600, "episodes_total": 11744, "training_iteration": 367, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-54-44", "timestamp": 1660258484, "time_this_iter_s": 33.39541292190552, "time_total_s": 16895.944562911987, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16895.944562911987, "timesteps_since_restore": 4697600, "iterations_since_restore": 367, "perf": {"cpu_util_percent": 29.602127659574467, "ram_util_percent": 58.99148936170216}} +{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.92}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.44, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.28, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0, 627.0, 579.0, 630.0, 621.0, 630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0, 519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8601760204307758, "mean_processing_ms": 0.2404010276903208, "mean_inference_ms": 1.4579011173262801}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8832000, "num_steps_sampled": 4710400, "sample_time_ms": 23247.48, "load_time_ms": 37.926, "grad_time_ms": 10791.679, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004444511607289314, "policy_loss": -0.0034118040930479765, "vf_loss": 84.17324829101562, "vf_explained_var": 0.7645478844642639, "kl": 0.0020590554922819138, "entropy": 1.1220086812973022, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4710400, "episodes_total": 11776, "training_iteration": 368, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-55-17", "timestamp": 1660258517, "time_this_iter_s": 32.34189581871033, "time_total_s": 16928.286458730698, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16928.286458730698, "timesteps_since_restore": 4710400, "iterations_since_restore": 368, "perf": {"cpu_util_percent": 31.186956521739134, "ram_util_percent": 58.830434782608684}} +{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 607.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 303.555}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.71, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0, 636.0, 630.0, 582.0, 582.0, 519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0, 633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8590666727700321, "mean_processing_ms": 0.24018037596280067, "mean_inference_ms": 1.4567772377738835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8856000, "num_steps_sampled": 4723200, "sample_time_ms": 23393.671, "load_time_ms": 38.324, "grad_time_ms": 11132.048, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011790187563747168, "policy_loss": -0.008629883639514446, "vf_loss": 80.15734100341797, "vf_explained_var": 0.7653172016143799, "kl": 0.001749455346725881, "entropy": 1.129709243774414, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4723200, "episodes_total": 11808, "training_iteration": 369, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-55-52", "timestamp": 1660258552, "time_this_iter_s": 34.8981990814209, "time_total_s": 16963.18465781212, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16963.18465781212, "timesteps_since_restore": 4723200, "iterations_since_restore": 369, "perf": {"cpu_util_percent": 32.48979591836735, "ram_util_percent": 58.86734693877551}} +{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 604.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 302.025}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.65, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0, 573.0, 627.0, 636.0, 587.0, 633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0, 587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8579708589489354, "mean_processing_ms": 0.23996389125057788, "mean_inference_ms": 1.455828460743175}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8880000, "num_steps_sampled": 4736000, "sample_time_ms": 23349.429, "load_time_ms": 38.506, "grad_time_ms": 11333.701, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00012880750000476837, "policy_loss": -0.007648926693946123, "vf_loss": 83.42855072021484, "vf_explained_var": 0.7715353965759277, "kl": 0.0017624356551095843, "entropy": 1.1302567720413208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4736000, "episodes_total": 11840, "training_iteration": 370, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-56-26", "timestamp": 1660258586, "time_this_iter_s": 33.842254877090454, "time_total_s": 16997.02691268921, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16997.02691268921, "timesteps_since_restore": 4736000, "iterations_since_restore": 370, "perf": {"cpu_util_percent": 32.75416666666667, "ram_util_percent": 58.89374999999999}} +{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 602.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 301.425}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.85, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.99, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.99, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.99, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0, 630.0, 639.0, 573.0, 582.0, 587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0, 579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8568823921280486, "mean_processing_ms": 0.23974756126791116, "mean_inference_ms": 1.4548800404150943}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8904000, "num_steps_sampled": 4748800, "sample_time_ms": 22963.635, "load_time_ms": 38.114, "grad_time_ms": 11201.711, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006220227223820984, "policy_loss": -0.007045889273285866, "vf_loss": 82.31112670898438, "vf_explained_var": 0.7558401226997375, "kl": 0.002209648722782731, "entropy": 1.1263946294784546, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4748800, "episodes_total": 11872, "training_iteration": 371, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-56-56", "timestamp": 1660258616, "time_this_iter_s": 30.35482382774353, "time_total_s": 17027.381736516953, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17027.381736516953, "timesteps_since_restore": 4748800, "iterations_since_restore": 371, "perf": {"cpu_util_percent": 31.702325581395346, "ram_util_percent": 58.86744186046512}} +{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 601.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 300.635}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.47, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0, 633.0, 582.0, 519.0, 627.0, 579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0, 630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8557975282130863, "mean_processing_ms": 0.23953222980731334, "mean_inference_ms": 1.453823461548284}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8928000, "num_steps_sampled": 4761600, "sample_time_ms": 22359.762, "load_time_ms": 38.161, "grad_time_ms": 11242.342, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00044215377420186996, "policy_loss": -0.007962713949382305, "vf_loss": 80.8259506225586, "vf_explained_var": 0.7670674920082092, "kl": 0.0015741548268124461, "entropy": 1.1240602731704712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4761600, "episodes_total": 11904, "training_iteration": 372, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-57-26", "timestamp": 1660258646, "time_this_iter_s": 29.648212909698486, "time_total_s": 17057.02994942665, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17057.02994942665, "timesteps_since_restore": 4761600, "iterations_since_restore": 372, "perf": {"cpu_util_percent": 34.21904761904762, "ram_util_percent": 58.84523809523809}} +{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 602.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.07}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 184.94, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.01, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.01, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.01, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0, 587.0, 624.0, 630.0, 539.0, 630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0, 582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8547135741769734, "mean_processing_ms": 0.23931513380752317, "mean_inference_ms": 1.4526421752907723}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8952000, "num_steps_sampled": 4774400, "sample_time_ms": 22196.797, "load_time_ms": 37.846, "grad_time_ms": 11312.767, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003103644819930196, "policy_loss": -0.004869487602263689, "vf_loss": 85.35115814208984, "vf_explained_var": 0.7750833630561829, "kl": 0.0021017238032072783, "entropy": 1.123950481414795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4774400, "episodes_total": 11936, "training_iteration": 373, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-57-58", "timestamp": 1660258678, "time_this_iter_s": 32.121092796325684, "time_total_s": 17089.151042222977, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17089.151042222977, "timesteps_since_restore": 4774400, "iterations_since_restore": 373, "perf": {"cpu_util_percent": 33.684444444444445, "ram_util_percent": 58.78888888888888}} +{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 602.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.475}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.75, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0, 473.0, 587.0, 624.0, 587.0, 582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0, 587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8536367311955897, "mean_processing_ms": 0.23909908370976882, "mean_inference_ms": 1.4514727184055203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8976000, "num_steps_sampled": 4787200, "sample_time_ms": 21894.527, "load_time_ms": 38.299, "grad_time_ms": 11412.728, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030573883559554815, "policy_loss": -0.004609658382833004, "vf_loss": 82.26570892333984, "vf_explained_var": 0.7665780186653137, "kl": 0.002119669923558831, "entropy": 1.119057059288025, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4787200, "episodes_total": 11968, "training_iteration": 374, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-58-31", "timestamp": 1660258711, "time_this_iter_s": 33.1311149597168, "time_total_s": 17122.282157182693, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17122.282157182693, "timesteps_since_restore": 4787200, "iterations_since_restore": 374, "perf": {"cpu_util_percent": 34.03404255319149, "ram_util_percent": 59.29999999999999}} +{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 608.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.09}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.98, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0, 630.0, 519.0, 587.0, 587.0, 587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0, 582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8525659073078762, "mean_processing_ms": 0.23888337042997632, "mean_inference_ms": 1.4503574621583197}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9000000, "num_steps_sampled": 4800000, "sample_time_ms": 21378.834, "load_time_ms": 37.98, "grad_time_ms": 11184.622, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034743063151836395, "policy_loss": -0.004098345525562763, "vf_loss": 81.30432891845703, "vf_explained_var": 0.7632368206977844, "kl": 0.0018746949499472976, "entropy": 1.1155738830566406, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4800000, "episodes_total": 12000, "training_iteration": 375, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-59-03", "timestamp": 1660258743, "time_this_iter_s": 31.725862979888916, "time_total_s": 17154.008020162582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17154.008020162582, "timesteps_since_restore": 4800000, "iterations_since_restore": 375, "perf": {"cpu_util_percent": 34.54888888888888, "ram_util_percent": 59.05333333333331}} +{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 611.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.835}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0, 390.0, 636.0, 579.0, 579.0, 582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0, 636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8515001170833599, "mean_processing_ms": 0.2386703929555994, "mean_inference_ms": 1.4492475264918965}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9024000, "num_steps_sampled": 4812800, "sample_time_ms": 21252.385, "load_time_ms": 37.854, "grad_time_ms": 10740.605, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007883608341217041, "policy_loss": -0.006749347317963839, "vf_loss": 80.9527359008789, "vf_explained_var": 0.7635239958763123, "kl": 0.0017555366503074765, "entropy": 1.115132212638855, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4812800, "episodes_total": 12032, "training_iteration": 376, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-59-32", "timestamp": 1660258772, "time_this_iter_s": 29.115790128707886, "time_total_s": 17183.12381029129, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17183.12381029129, "timesteps_since_restore": 4812800, "iterations_since_restore": 376, "perf": {"cpu_util_percent": 34.046341463414635, "ram_util_percent": 58.78536585365854}} +{"episode_reward_max": 639.0, "episode_reward_min": 558.0, "episode_reward_mean": 615.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 273.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.97}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.14, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.69, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.69, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.69, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0, 630.0, 636.0, 627.0, 581.0, 636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0, 582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8504378248511929, "mean_processing_ms": 0.23845860870627447, "mean_inference_ms": 1.448146633207985}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9048000, "num_steps_sampled": 4825600, "sample_time_ms": 21404.603, "load_time_ms": 37.763, "grad_time_ms": 10323.133, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002214103704318404, "policy_loss": -0.005506592337042093, "vf_loss": 82.8126449584961, "vf_explained_var": 0.766756534576416, "kl": 0.0020635148975998163, "entropy": 1.1211366653442383, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4825600, "episodes_total": 12064, "training_iteration": 377, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-00-03", "timestamp": 1660258803, "time_this_iter_s": 30.737117767333984, "time_total_s": 17213.860928058624, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17213.860928058624, "timesteps_since_restore": 4825600, "iterations_since_restore": 377, "perf": {"cpu_util_percent": 36.85227272727273, "ram_util_percent": 58.872727272727275}} +{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 616.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 264.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.28}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.76, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.81, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.7, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.06, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.3, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.7, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.7, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0, 639.0, 639.0, 587.0, 630.0, 582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0, 630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8493908771025369, "mean_processing_ms": 0.23825289023894292, "mean_inference_ms": 1.447125693575985}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9072000, "num_steps_sampled": 4838400, "sample_time_ms": 21529.609, "load_time_ms": 37.587, "grad_time_ms": 10211.527, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015308427391573787, "policy_loss": -0.006051002535969019, "vf_loss": 81.36373901367188, "vf_explained_var": 0.7675411701202393, "kl": 0.00216054730117321, "entropy": 1.1090576648712158, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4838400, "episodes_total": 12096, "training_iteration": 378, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-00-35", "timestamp": 1660258835, "time_this_iter_s": 32.476667165756226, "time_total_s": 17246.33759522438, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17246.33759522438, "timesteps_since_restore": 4838400, "iterations_since_restore": 378, "perf": {"cpu_util_percent": 34.55434782608695, "ram_util_percent": 59.1304347826087}} +{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 614.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.265}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.93, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.14, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0, 630.0, 582.0, 581.0, 630.0, 630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0, 633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.84835648025371, "mean_processing_ms": 0.23804887644488537, "mean_inference_ms": 1.446224605883411}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9096000, "num_steps_sampled": 4851200, "sample_time_ms": 21545.942, "load_time_ms": 37.194, "grad_time_ms": 9930.811, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008399917860515416, "policy_loss": -0.0065034665167331696, "vf_loss": 79.03890991210938, "vf_explained_var": 0.7710984349250793, "kl": 0.0017613372765481472, "entropy": 1.1208573579788208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4851200, "episodes_total": 12128, "training_iteration": 379, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-01-07", "timestamp": 1660258867, "time_this_iter_s": 32.251976013183594, "time_total_s": 17278.589571237564, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17278.589571237564, "timesteps_since_restore": 4851200, "iterations_since_restore": 379, "perf": {"cpu_util_percent": 33.40222222222222, "ram_util_percent": 58.955555555555534}} +{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 614.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.18}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.76, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0, 636.0, 582.0, 630.0, 639.0, 633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0, 573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8473261545871941, "mean_processing_ms": 0.23784601732362667, "mean_inference_ms": 1.4452831057564066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9120000, "num_steps_sampled": 4864000, "sample_time_ms": 21329.654, "load_time_ms": 36.87, "grad_time_ms": 9824.642, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010081265354529023, "policy_loss": -0.006387272384017706, "vf_loss": 79.55323028564453, "vf_explained_var": 0.7746841311454773, "kl": 0.001845820457674563, "entropy": 1.1198536157608032, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4864000, "episodes_total": 12160, "training_iteration": 380, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-01-38", "timestamp": 1660258898, "time_this_iter_s": 30.608419179916382, "time_total_s": 17309.19799041748, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17309.19799041748, "timesteps_since_restore": 4864000, "iterations_since_restore": 380, "perf": {"cpu_util_percent": 34.48139534883721, "ram_util_percent": 58.923255813953475}} +{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 615.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.95}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0, 630.0, 639.0, 636.0, 636.0, 573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0, 627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8462923346422779, "mean_processing_ms": 0.23764003789944027, "mean_inference_ms": 1.4442668898213196}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9144000, "num_steps_sampled": 4876800, "sample_time_ms": 21323.81, "load_time_ms": 36.975, "grad_time_ms": 9811.843, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005420349538326263, "policy_loss": -0.0019105566898360848, "vf_loss": 78.93695068359375, "vf_explained_var": 0.772759735584259, "kl": 0.0018517466960474849, "entropy": 1.1255789995193481, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4876800, "episodes_total": 12192, "training_iteration": 381, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-02-08", "timestamp": 1660258928, "time_this_iter_s": 30.169427156448364, "time_total_s": 17339.36741757393, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17339.36741757393, "timesteps_since_restore": 4876800, "iterations_since_restore": 381, "perf": {"cpu_util_percent": 34.1, "ram_util_percent": 58.95116279069769}} +{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 615.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.735}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.67, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.69, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.01, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.93, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.93, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.93, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 630.0, 630.0, 627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0, 579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.845260231721656, "mean_processing_ms": 0.23743439147057216, "mean_inference_ms": 1.443180349457874}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9168000, "num_steps_sampled": 4889600, "sample_time_ms": 21404.999, "load_time_ms": 36.99, "grad_time_ms": 9811.226, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002549513941630721, "policy_loss": -0.004884073510766029, "vf_loss": 79.93880462646484, "vf_explained_var": 0.7685417532920837, "kl": 0.002187439240515232, "entropy": 1.1205859184265137, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4889600, "episodes_total": 12224, "training_iteration": 382, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-02-39", "timestamp": 1660258959, "time_this_iter_s": 30.454362154006958, "time_total_s": 17369.821779727936, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17369.821779727936, "timesteps_since_restore": 4889600, "iterations_since_restore": 382, "perf": {"cpu_util_percent": 33.88139534883721, "ram_util_percent": 58.944186046511625}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.555}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.11, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.61, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0, 582.0, 633.0, 633.0, 636.0, 579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0, 633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8442347187033086, "mean_processing_ms": 0.23722937400390215, "mean_inference_ms": 1.442092522505549}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9192000, "num_steps_sampled": 4902400, "sample_time_ms": 21412.011, "load_time_ms": 37.029, "grad_time_ms": 9601.556, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016393003752455115, "policy_loss": -0.005780236795544624, "vf_loss": 79.79308319091797, "vf_explained_var": 0.7686330676078796, "kl": 0.001640369649976492, "entropy": 1.1195478439331055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4902400, "episodes_total": 12256, "training_iteration": 383, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-03-09", "timestamp": 1660258989, "time_this_iter_s": 30.096380949020386, "time_total_s": 17399.918160676956, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17399.918160676956, "timesteps_since_restore": 4902400, "iterations_since_restore": 383, "perf": {"cpu_util_percent": 34.86279069767443, "ram_util_percent": 58.95348837209304}} +{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 614.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.08}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.16, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0, 584.0, 633.0, 633.0, 630.0, 633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8432201147992054, "mean_processing_ms": 0.23702861990301977, "mean_inference_ms": 1.4411244641965177}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9216000, "num_steps_sampled": 4915200, "sample_time_ms": 21556.377, "load_time_ms": 37.038, "grad_time_ms": 9524.16, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002193765016272664, "policy_loss": -0.0053141750395298, "vf_loss": 80.70391082763672, "vf_explained_var": 0.7705557942390442, "kl": 0.0018228074768558145, "entropy": 1.124890685081482, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4915200, "episodes_total": 12288, "training_iteration": 384, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-03-43", "timestamp": 1660259023, "time_this_iter_s": 33.80204796791077, "time_total_s": 17433.720208644867, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17433.720208644867, "timesteps_since_restore": 4915200, "iterations_since_restore": 384, "perf": {"cpu_util_percent": 36.197872340425526, "ram_util_percent": 59.73617021276596}} +{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 611.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.76}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.52, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.34, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.34, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.34, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8422101834912255, "mean_processing_ms": 0.23682867408475425, "mean_inference_ms": 1.4402525240623634}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9240000, "num_steps_sampled": 4928000, "sample_time_ms": 21695.699, "load_time_ms": 36.564, "grad_time_ms": 9521.463, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004186244681477547, "policy_loss": -0.0031982522923499346, "vf_loss": 79.4544906616211, "vf_explained_var": 0.7711065411567688, "kl": 0.002083372324705124, "entropy": 1.1219121217727661, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4928000, "episodes_total": 12320, "training_iteration": 385, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-04-16", "timestamp": 1660259056, "time_this_iter_s": 33.08577585220337, "time_total_s": 17466.80598449707, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17466.80598449707, "timesteps_since_restore": 4928000, "iterations_since_restore": 385, "perf": {"cpu_util_percent": 33.6468085106383, "ram_util_percent": 59.0808510638298}} +{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 609.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.845}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.29, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0, 582.0, 633.0, 636.0, 633.0, 633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0, 573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8412065546272243, "mean_processing_ms": 0.23663028686685655, "mean_inference_ms": 1.4395227599407487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9264000, "num_steps_sampled": 4940800, "sample_time_ms": 21927.826, "load_time_ms": 36.438, "grad_time_ms": 9711.085, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002509244019165635, "policy_loss": -0.005174743477255106, "vf_loss": 82.38806915283203, "vf_explained_var": 0.7595655918121338, "kl": 0.0020332231651991606, "entropy": 1.1096264123916626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4940800, "episodes_total": 12352, "training_iteration": 386, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-04-49", "timestamp": 1660259089, "time_this_iter_s": 33.33205199241638, "time_total_s": 17500.138036489487, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17500.138036489487, "timesteps_since_restore": 4940800, "iterations_since_restore": 386, "perf": {"cpu_util_percent": 33.47234042553192, "ram_util_percent": 59.114893617021295}} +{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 607.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.515}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.63, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.05, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0, 579.0, 639.0, 579.0, 636.0, 573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0, 636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.840202535823751, "mean_processing_ms": 0.2364293047857562, "mean_inference_ms": 1.4386641797535789}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9288000, "num_steps_sampled": 4953600, "sample_time_ms": 21835.055, "load_time_ms": 36.557, "grad_time_ms": 9742.403, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00015397991228383034, "policy_loss": -0.007473704870790243, "vf_loss": 81.87383270263672, "vf_explained_var": 0.7745316028594971, "kl": 0.0020445811096578836, "entropy": 1.1193923950195312, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4953600, "episodes_total": 12384, "training_iteration": 387, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-05-19", "timestamp": 1660259119, "time_this_iter_s": 30.124536752700806, "time_total_s": 17530.262573242188, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17530.262573242188, "timesteps_since_restore": 4953600, "iterations_since_restore": 387, "perf": {"cpu_util_percent": 31.948837209302326, "ram_util_percent": 58.95581395348838}} +{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 611.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.82}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.64, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.96, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.51, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.51, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.51, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0, 587.0, 636.0, 636.0, 579.0, 636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0, 630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8391970766904484, "mean_processing_ms": 0.23622675772030782, "mean_inference_ms": 1.4376630323224628}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9312000, "num_steps_sampled": 4966400, "sample_time_ms": 21599.042, "load_time_ms": 36.591, "grad_time_ms": 9790.428, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022788590285927057, "policy_loss": -0.00476012472063303, "vf_loss": 75.92620849609375, "vf_explained_var": 0.7665655016899109, "kl": 0.0023904216941446066, "entropy": 1.1072710752487183, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4966400, "episodes_total": 12416, "training_iteration": 388, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-05-50", "timestamp": 1660259150, "time_this_iter_s": 30.593504667282104, "time_total_s": 17560.85607790947, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17560.85607790947, "timesteps_since_restore": 4966400, "iterations_since_restore": 388, "perf": {"cpu_util_percent": 33.502325581395354, "ram_util_percent": 59.08372093023256}} +{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 614.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.46}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.12, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.31, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0, 633.0, 636.0, 636.0, 627.0, 630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0, 582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.838196919665678, "mean_processing_ms": 0.23602508803676062, "mean_inference_ms": 1.4365150313753652}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9336000, "num_steps_sampled": 4979200, "sample_time_ms": 21400.03, "load_time_ms": 36.799, "grad_time_ms": 9763.193, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015943764010444283, "policy_loss": -0.005665285978466272, "vf_loss": 78.14656829833984, "vf_explained_var": 0.7691711783409119, "kl": 0.0016816608840599656, "entropy": 1.1099879741668701, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4979200, "episodes_total": 12448, "training_iteration": 389, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-06-20", "timestamp": 1660259180, "time_this_iter_s": 29.990082025527954, "time_total_s": 17590.846159934998, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17590.846159934998, "timesteps_since_restore": 4979200, "iterations_since_restore": 389, "perf": {"cpu_util_percent": 36.06279069767442, "ram_util_percent": 59.181395348837206}} +{"episode_reward_max": 639.0, "episode_reward_min": 567.0, "episode_reward_mean": 617.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 275.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.805}, "custom_metrics": {"sparse_reward_mean": 214.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.81, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.97, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0, 587.0, 633.0, 633.0, 627.0, 582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0, 633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8371997037946857, "mean_processing_ms": 0.23582575562090372, "mean_inference_ms": 1.4353288532965658}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9360000, "num_steps_sampled": 4992000, "sample_time_ms": 21348.738, "load_time_ms": 36.486, "grad_time_ms": 9719.814, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004423701611813158, "policy_loss": -0.007186357397586107, "vf_loss": 81.85875701904297, "vf_explained_var": 0.7682639956474304, "kl": 0.002242224058136344, "entropy": 1.114312767982483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4992000, "episodes_total": 12480, "training_iteration": 390, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-06-49", "timestamp": 1660259209, "time_this_iter_s": 29.65726089477539, "time_total_s": 17620.503420829773, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17620.503420829773, "timesteps_since_restore": 4992000, "iterations_since_restore": 390, "perf": {"cpu_util_percent": 34.275609756097566, "ram_util_percent": 59.390243902439025}} +{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 614.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.385}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.37, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0, 587.0, 579.0, 570.0, 633.0, 633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0, 582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.83620884479323, "mean_processing_ms": 0.23562873648902904, "mean_inference_ms": 1.4341204983718234}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9384000, "num_steps_sampled": 5004800, "sample_time_ms": 21231.946, "load_time_ms": 36.725, "grad_time_ms": 9739.689, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004321941174566746, "policy_loss": -0.0030343374237418175, "vf_loss": 79.1146011352539, "vf_explained_var": 0.7782495021820068, "kl": 0.0022527193650603294, "entropy": 1.1103630065917969, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5004800, "episodes_total": 12512, "training_iteration": 391, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-07-19", "timestamp": 1660259239, "time_this_iter_s": 29.202332973480225, "time_total_s": 17649.705753803253, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17649.705753803253, "timesteps_since_restore": 5004800, "iterations_since_restore": 391, "perf": {"cpu_util_percent": 32.102380952380955, "ram_util_percent": 59.785714285714285}} +{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 613.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.805}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.41, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.1, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.1, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.1, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0, 579.0, 633.0, 570.0, 633.0, 582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0, 593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8352162722416216, "mean_processing_ms": 0.2354315996686975, "mean_inference_ms": 1.4328297296809587}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9408000, "num_steps_sampled": 5017600, "sample_time_ms": 20999.824, "load_time_ms": 36.668, "grad_time_ms": 9737.715, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033923883456736803, "policy_loss": -0.004631926771253347, "vf_loss": 85.770751953125, "vf_explained_var": 0.7699734568595886, "kl": 0.0020241406746208668, "entropy": 1.1055186986923218, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5017600, "episodes_total": 12544, "training_iteration": 392, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-07-47", "timestamp": 1660259267, "time_this_iter_s": 28.112272024154663, "time_total_s": 17677.818025827408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17677.818025827408, "timesteps_since_restore": 5017600, "iterations_since_restore": 392, "perf": {"cpu_util_percent": 32.03076923076923, "ram_util_percent": 59.29230769230767}} +{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 615.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.81}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.22, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.53, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.04, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.01, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.01, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.01, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0, 579.0, 639.0, 633.0, 579.0, 593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0, 579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8342365209622892, "mean_processing_ms": 0.23523671051615616, "mean_inference_ms": 1.4317637574949895}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9432000, "num_steps_sampled": 5030400, "sample_time_ms": 21299.857, "load_time_ms": 37.172, "grad_time_ms": 9869.636, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 4.354631528258324e-05, "policy_loss": -0.0072962199337780476, "vf_loss": 78.87313079833984, "vf_explained_var": 0.7646682262420654, "kl": 0.0020736621227115393, "entropy": 1.0950974225997925, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5030400, "episodes_total": 12576, "training_iteration": 393, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-08-21", "timestamp": 1660259301, "time_this_iter_s": 34.420122146606445, "time_total_s": 17712.238147974014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17712.238147974014, "timesteps_since_restore": 5030400, "iterations_since_restore": 393, "perf": {"cpu_util_percent": 32.710204081632654, "ram_util_percent": 59.30816326530613}} +{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 614.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.225}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.85, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0, 636.0, 408.0, 636.0, 636.0, 579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0, 633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8332639627657553, "mean_processing_ms": 0.23504271675970742, "mean_inference_ms": 1.430763529891338}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9456000, "num_steps_sampled": 5043200, "sample_time_ms": 21101.644, "load_time_ms": 36.639, "grad_time_ms": 9913.059, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006912912358529866, "policy_loss": -0.006923032458871603, "vf_loss": 81.60092163085938, "vf_explained_var": 0.7633647918701172, "kl": 0.001780605292879045, "entropy": 1.0915051698684692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5043200, "episodes_total": 12608, "training_iteration": 394, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-08-53", "timestamp": 1660259333, "time_this_iter_s": 32.24967384338379, "time_total_s": 17744.487821817398, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17744.487821817398, "timesteps_since_restore": 5043200, "iterations_since_restore": 394, "perf": {"cpu_util_percent": 33.69347826086956, "ram_util_percent": 59.52391304347825}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.82, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.74, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.74, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.74, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0, 633.0, 627.0, 582.0, 576.0, 633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0, 579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8323023893129786, "mean_processing_ms": 0.23485230859207035, "mean_inference_ms": 1.4300119711867751}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9480000, "num_steps_sampled": 5056000, "sample_time_ms": 21142.709, "load_time_ms": 37.292, "grad_time_ms": 10082.816, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00028009479865431786, "policy_loss": -0.007545720785856247, "vf_loss": 83.69845581054688, "vf_explained_var": 0.7653185725212097, "kl": 0.001969197066500783, "entropy": 1.0880564451217651, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5056000, "episodes_total": 12640, "training_iteration": 395, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-09-29", "timestamp": 1660259369, "time_this_iter_s": 35.19951057434082, "time_total_s": 17779.68733239174, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17779.68733239174, "timesteps_since_restore": 5056000, "iterations_since_restore": 395, "perf": {"cpu_util_percent": 30.6265306122449, "ram_util_percent": 59.11836734693878}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 618.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.44}, "custom_metrics": {"sparse_reward_mean": 214.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.08, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.79, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.7, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.7, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.7, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0, 639.0, 630.0, 636.0, 639.0, 579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0, 636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8313395238935175, "mean_processing_ms": 0.2346607427713282, "mean_inference_ms": 1.4290797727914242}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9504000, "num_steps_sampled": 5068800, "sample_time_ms": 20854.832, "load_time_ms": 37.297, "grad_time_ms": 9880.508, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009566675289534032, "policy_loss": -0.0062157814390957355, "vf_loss": 77.21820068359375, "vf_explained_var": 0.7768221497535706, "kl": 0.0020733082201331854, "entropy": 1.0987348556518555, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5068800, "episodes_total": 12672, "training_iteration": 396, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-09-57", "timestamp": 1660259397, "time_this_iter_s": 28.42993927001953, "time_total_s": 17808.11727166176, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17808.11727166176, "timesteps_since_restore": 5068800, "iterations_since_restore": 396, "perf": {"cpu_util_percent": 34.480487804878045, "ram_util_percent": 59.09024390243902}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 616.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.265}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.33, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.89, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.64, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.93, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0, 573.0, 636.0, 633.0, 522.0, 636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0, 636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8303852563726812, "mean_processing_ms": 0.2344708309042675, "mean_inference_ms": 1.4281787126468246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9528000, "num_steps_sampled": 5081600, "sample_time_ms": 20985.415, "load_time_ms": 37.239, "grad_time_ms": 9808.735, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003967406693845987, "policy_loss": -0.0038024026434868574, "vf_loss": 83.1785888671875, "vf_explained_var": 0.769153892993927, "kl": 0.0019865171052515507, "entropy": 1.0961049795150757, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5081600, "episodes_total": 12704, "training_iteration": 397, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-10-28", "timestamp": 1660259428, "time_this_iter_s": 30.70757508277893, "time_total_s": 17838.824846744537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17838.824846744537, "timesteps_since_restore": 5081600, "iterations_since_restore": 397, "perf": {"cpu_util_percent": 33.08837209302326, "ram_util_percent": 59.13023255813955}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 308.56}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.52, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.17, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.17, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.17, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0, 590.0, 630.0, 636.0, 582.0, 636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0, 636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8294402136883693, "mean_processing_ms": 0.23428178262037597, "mean_inference_ms": 1.4273260562333872}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9552000, "num_steps_sampled": 5094400, "sample_time_ms": 21312.43, "load_time_ms": 37.532, "grad_time_ms": 9807.592, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002816990716382861, "policy_loss": -0.004787659738212824, "vf_loss": 81.49095916748047, "vf_explained_var": 0.7696583867073059, "kl": 0.0025824178010225296, "entropy": 1.0888774394989014, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5094400, "episodes_total": 12736, "training_iteration": 398, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-11-02", "timestamp": 1660259462, "time_this_iter_s": 33.86050295829773, "time_total_s": 17872.685349702835, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17872.685349702835, "timesteps_since_restore": 5094400, "iterations_since_restore": 398, "perf": {"cpu_util_percent": 30.977083333333336, "ram_util_percent": 59.083333333333336}} +{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 609.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 304.595}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 187.59, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0, 633.0, 636.0, 582.0, 578.0, 636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.82850652599667, "mean_processing_ms": 0.23409592110050972, "mean_inference_ms": 1.426659038222931}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9576000, "num_steps_sampled": 5107200, "sample_time_ms": 21603.638, "load_time_ms": 37.362, "grad_time_ms": 9958.009, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00230390764772892, "policy_loss": -0.00582013139501214, "vf_loss": 86.7406005859375, "vf_explained_var": 0.7753866314888, "kl": 0.0019396115094423294, "entropy": 1.1000421047210693, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5107200, "episodes_total": 12768, "training_iteration": 399, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-11-36", "timestamp": 1660259496, "time_this_iter_s": 34.40714716911316, "time_total_s": 17907.09249687195, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17907.09249687195, "timesteps_since_restore": 5107200, "iterations_since_restore": 399, "perf": {"cpu_util_percent": 33.638775510204084, "ram_util_percent": 59.13265306122449}} +{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 612.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.07}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.54, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.92, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0, 636.0, 633.0, 636.0, 582.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0, 197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8275880756644836, "mean_processing_ms": 0.23391670126994718, "mean_inference_ms": 1.4262180371248092}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9600000, "num_steps_sampled": 5120000, "sample_time_ms": 22135.485, "load_time_ms": 37.429, "grad_time_ms": 9991.096, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019203064730390906, "policy_loss": -0.005054800305515528, "vf_loss": 75.28291320800781, "vf_explained_var": 0.7728467583656311, "kl": 0.00209710281342268, "entropy": 1.106364130973816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5120000, "episodes_total": 12800, "training_iteration": 400, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-12-12", "timestamp": 1660259532, "time_this_iter_s": 35.30730485916138, "time_total_s": 17942.39980173111, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17942.39980173111, "timesteps_since_restore": 5120000, "iterations_since_restore": 400, "perf": {"cpu_util_percent": 33.525999999999996, "ram_util_percent": 59.168}} +{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 613.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.635}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.47, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.92, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0, 633.0, 636.0, 587.0, 633.0, 197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0, 636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8266799330355084, "mean_processing_ms": 0.23374167551235864, "mean_inference_ms": 1.4257962598910456}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9624000, "num_steps_sampled": 5132800, "sample_time_ms": 22522.212, "load_time_ms": 37.519, "grad_time_ms": 10359.995, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000927128829061985, "policy_loss": -0.006055487785488367, "vf_loss": 75.37408447265625, "vf_explained_var": 0.7751708030700684, "kl": 0.0019053876167163253, "entropy": 1.1095930337905884, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5132800, "episodes_total": 12832, "training_iteration": 401, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-12-49", "timestamp": 1660259569, "time_this_iter_s": 36.76053810119629, "time_total_s": 17979.160339832306, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17979.160339832306, "timesteps_since_restore": 5132800, "iterations_since_restore": 401, "perf": {"cpu_util_percent": 34.715094339622645, "ram_util_percent": 59.21886792452831}} +{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 619.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.825}, "custom_metrics": {"sparse_reward_mean": 214.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.05, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.77, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0, 624.0, 636.0, 581.0, 636.0, 636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0, 630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8257902245780139, "mean_processing_ms": 0.23357282477056074, "mean_inference_ms": 1.425400068641748}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9648000, "num_steps_sampled": 5145600, "sample_time_ms": 23036.82, "load_time_ms": 37.721, "grad_time_ms": 10540.081, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013805682538077235, "policy_loss": -0.006049450021237135, "vf_loss": 79.8260269165039, "vf_explained_var": 0.7674198746681213, "kl": 0.002044239779934287, "entropy": 1.1051733493804932, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5145600, "episodes_total": 12864, "training_iteration": 402, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-13-24", "timestamp": 1660259604, "time_this_iter_s": 35.064194202423096, "time_total_s": 18014.22453403473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18014.22453403473, "timesteps_since_restore": 5145600, "iterations_since_restore": 402, "perf": {"cpu_util_percent": 38.665306122448975, "ram_util_percent": 59.40408163265307}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.27}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.14, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0, 582.0, 636.0, 582.0, 579.0, 630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0, 527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8249056294785048, "mean_processing_ms": 0.2334031065407326, "mean_inference_ms": 1.4249795896358415}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9672000, "num_steps_sampled": 5158400, "sample_time_ms": 23134.408, "load_time_ms": 37.402, "grad_time_ms": 10502.988, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0036110735964030027, "policy_loss": -0.003584003308787942, "vf_loss": 77.51012420654297, "vf_explained_var": 0.770778238773346, "kl": 0.00190709566231817, "entropy": 1.1118710041046143, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5158400, "episodes_total": 12896, "training_iteration": 403, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-13-59", "timestamp": 1660259639, "time_this_iter_s": 35.02220106124878, "time_total_s": 18049.246735095978, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18049.246735095978, "timesteps_since_restore": 5158400, "iterations_since_restore": 403, "perf": {"cpu_util_percent": 34.604, "ram_util_percent": 59.326}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.125}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.25, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.61, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0, 633.0, 630.0, 639.0, 587.0, 527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0, 527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8240164523926264, "mean_processing_ms": 0.23323028854357722, "mean_inference_ms": 1.4244623501474682}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9696000, "num_steps_sampled": 5171200, "sample_time_ms": 23257.709, "load_time_ms": 37.644, "grad_time_ms": 10547.246, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014424080727621913, "policy_loss": -0.00573391281068325, "vf_loss": 77.33064270019531, "vf_explained_var": 0.7716807723045349, "kl": 0.0015154121210798621, "entropy": 1.1135029792785645, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5171200, "episodes_total": 12928, "training_iteration": 404, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-14-33", "timestamp": 1660259673, "time_this_iter_s": 33.92467999458313, "time_total_s": 18083.17141509056, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18083.17141509056, "timesteps_since_restore": 5171200, "iterations_since_restore": 404, "perf": {"cpu_util_percent": 34.637499999999996, "ram_util_percent": 58.86041666666667}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 612.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.235}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.66, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0, 636.0, 584.0, 630.0, 581.0, 527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0, 636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8231192050910455, "mean_processing_ms": 0.23305365514326026, "mean_inference_ms": 1.4238788752206395}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9720000, "num_steps_sampled": 5184000, "sample_time_ms": 23218.954, "load_time_ms": 37.026, "grad_time_ms": 10357.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004148914944380522, "policy_loss": -0.003635302884504199, "vf_loss": 83.42072296142578, "vf_explained_var": 0.7650599479675293, "kl": 0.001778147299773991, "entropy": 1.115702509880066, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5184000, "episodes_total": 12960, "training_iteration": 405, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-15-06", "timestamp": 1660259706, "time_this_iter_s": 32.90920972824097, "time_total_s": 18116.080624818802, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18116.080624818802, "timesteps_since_restore": 5184000, "iterations_since_restore": 405, "perf": {"cpu_util_percent": 30.800000000000004, "ram_util_percent": 58.806521739130446}} +{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 615.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.555}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.11, "shaped_reward_min": 172, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0, 582.0, 627.0, 630.0, 576.0, 636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0, 579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8222123786920608, "mean_processing_ms": 0.23287368102080933, "mean_inference_ms": 1.4230607054783406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9744000, "num_steps_sampled": 5196800, "sample_time_ms": 23260.688, "load_time_ms": 37.047, "grad_time_ms": 10547.131, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006928029470145702, "policy_loss": -0.006872573401778936, "vf_loss": 81.25198364257812, "vf_explained_var": 0.7684532999992371, "kl": 0.0019740292336791754, "entropy": 1.1196430921554565, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5196800, "episodes_total": 12992, "training_iteration": 406, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-15-37", "timestamp": 1660259737, "time_this_iter_s": 30.75086998939514, "time_total_s": 18146.831494808197, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18146.831494808197, "timesteps_since_restore": 5196800, "iterations_since_restore": 406, "perf": {"cpu_util_percent": 34.53863636363637, "ram_util_percent": 58.81590909090908}} +{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 605.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 302.91}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 187.02, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.8, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0, 639.0, 630.0, 639.0, 582.0, 579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0, 627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8213047734882832, "mean_processing_ms": 0.2326926130887054, "mean_inference_ms": 1.4220964273978254}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9768000, "num_steps_sampled": 5209600, "sample_time_ms": 23078.335, "load_time_ms": 37.045, "grad_time_ms": 10614.832, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037456925492733717, "policy_loss": -0.004354165401309729, "vf_loss": 86.5316162109375, "vf_explained_var": 0.7801554799079895, "kl": 0.0024353403132408857, "entropy": 1.1066083908081055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5209600, "episodes_total": 13024, "training_iteration": 407, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-16-07", "timestamp": 1660259767, "time_this_iter_s": 29.56272530555725, "time_total_s": 18176.394220113754, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18176.394220113754, "timesteps_since_restore": 5209600, "iterations_since_restore": 407, "perf": {"cpu_util_percent": 33.98809523809524, "ram_util_percent": 58.745238095238086}} +{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 607.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.94}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 187.08, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0, 584.0, 633.0, 590.0, 630.0, 627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0, 633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8203957464749945, "mean_processing_ms": 0.2325105755388932, "mean_inference_ms": 1.4210101321453532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9792000, "num_steps_sampled": 5222400, "sample_time_ms": 22766.104, "load_time_ms": 36.963, "grad_time_ms": 10377.417, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032091455068439245, "policy_loss": -0.004078669007867575, "vf_loss": 78.43866729736328, "vf_explained_var": 0.7684862613677979, "kl": 0.00216904329136014, "entropy": 1.1121129989624023, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5222400, "episodes_total": 13056, "training_iteration": 408, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-16-35", "timestamp": 1660259795, "time_this_iter_s": 28.361918210983276, "time_total_s": 18204.756138324738, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18204.756138324738, "timesteps_since_restore": 5222400, "iterations_since_restore": 408, "perf": {"cpu_util_percent": 30.642500000000002, "ram_util_percent": 58.74749999999999}} +{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 606.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.12}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 186.64, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.69, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.15, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.15, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.15, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0, 587.0, 582.0, 636.0, 582.0, 633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0, 636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8194928769561236, "mean_processing_ms": 0.23232969366347853, "mean_inference_ms": 1.4199784153489992}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9816000, "num_steps_sampled": 5235200, "sample_time_ms": 22579.389, "load_time_ms": 36.726, "grad_time_ms": 10063.447, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015366753796115518, "policy_loss": -0.006077593192458153, "vf_loss": 81.69181060791016, "vf_explained_var": 0.7707114219665527, "kl": 0.001978269312530756, "entropy": 1.1098326444625854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5235200, "episodes_total": 13088, "training_iteration": 409, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-17-04", "timestamp": 1660259824, "time_this_iter_s": 29.396647930145264, "time_total_s": 18234.152786254883, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18234.152786254883, "timesteps_since_restore": 5235200, "iterations_since_restore": 409, "perf": {"cpu_util_percent": 30.553658536585367, "ram_util_percent": 58.824390243902435}} +{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 616.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.15}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.48, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0, 582.0, 587.0, 636.0, 636.0, 636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0, 633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8185939281976542, "mean_processing_ms": 0.23214957034967199, "mean_inference_ms": 1.4189514044158715}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9840000, "num_steps_sampled": 5248000, "sample_time_ms": 22039.582, "load_time_ms": 37.045, "grad_time_ms": 9893.172, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014178849523887038, "policy_loss": -0.008465434424579144, "vf_loss": 76.02017974853516, "vf_explained_var": 0.7725793719291687, "kl": 0.0019942354410886765, "entropy": 1.1089389324188232, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5248000, "episodes_total": 13120, "training_iteration": 410, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-17-33", "timestamp": 1660259853, "time_this_iter_s": 28.213119983673096, "time_total_s": 18262.365906238556, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18262.365906238556, "timesteps_since_restore": 5248000, "iterations_since_restore": 410, "perf": {"cpu_util_percent": 35.04, "ram_util_percent": 58.745000000000005}} +{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 615.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.625}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.45, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.79, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.29, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.11, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.55, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.84, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.77, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.11, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.11, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0, 630.0, 633.0, 516.0, 633.0, 633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0, 587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8176992911646718, "mean_processing_ms": 0.23196962818643072, "mean_inference_ms": 1.417972483148229}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9864000, "num_steps_sampled": 5260800, "sample_time_ms": 21770.07, "load_time_ms": 36.824, "grad_time_ms": 9519.356, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001577894203364849, "policy_loss": -0.005276820156723261, "vf_loss": 74.05913543701172, "vf_explained_var": 0.7708218693733215, "kl": 0.002156370086595416, "entropy": 1.1023942232131958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5260800, "episodes_total": 13152, "training_iteration": 411, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-18-03", "timestamp": 1660259883, "time_this_iter_s": 30.32603693008423, "time_total_s": 18292.69194316864, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18292.69194316864, "timesteps_since_restore": 5260800, "iterations_since_restore": 411, "perf": {"cpu_util_percent": 34.49999999999999, "ram_util_percent": 58.81162790697674}} +{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.465}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.93, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0, 636.0, 630.0, 630.0, 636.0, 587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0, 582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8168045018063097, "mean_processing_ms": 0.23178953609537822, "mean_inference_ms": 1.4169011715931346}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9888000, "num_steps_sampled": 5273600, "sample_time_ms": 21329.9, "load_time_ms": 36.743, "grad_time_ms": 9279.775, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034279574174433947, "policy_loss": -0.004527573008090258, "vf_loss": 85.10655975341797, "vf_explained_var": 0.7758853435516357, "kl": 0.0018181651830673218, "entropy": 1.1102546453475952, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5273600, "episodes_total": 13184, "training_iteration": 412, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-18-31", "timestamp": 1660259911, "time_this_iter_s": 28.26536202430725, "time_total_s": 18320.957305192947, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18320.957305192947, "timesteps_since_restore": 5273600, "iterations_since_restore": 412, "perf": {"cpu_util_percent": 35.269999999999996, "ram_util_percent": 59.315}} +{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 304.08}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.96, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0, 630.0, 639.0, 636.0, 636.0, 582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0, 630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8159183949251133, "mean_processing_ms": 0.23161113375748543, "mean_inference_ms": 1.4159039621746354}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9912000, "num_steps_sampled": 5286400, "sample_time_ms": 21002.373, "load_time_ms": 36.818, "grad_time_ms": 9265.226, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002250772900879383, "policy_loss": -0.005318752024322748, "vf_loss": 81.240478515625, "vf_explained_var": 0.7617523074150085, "kl": 0.0018393909558653831, "entropy": 1.1090354919433594, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5286400, "episodes_total": 13216, "training_iteration": 413, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-19-03", "timestamp": 1660259943, "time_this_iter_s": 31.600411891937256, "time_total_s": 18352.557717084885, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18352.557717084885, "timesteps_since_restore": 5286400, "iterations_since_restore": 413, "perf": {"cpu_util_percent": 31.806666666666665, "ram_util_percent": 58.973333333333315}} +{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.405}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 187.21, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.56, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 579.0, 630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0, 627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8150462375742643, "mean_processing_ms": 0.23143722383890097, "mean_inference_ms": 1.4151601741062898}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9936000, "num_steps_sampled": 5299200, "sample_time_ms": 21315.468, "load_time_ms": 36.748, "grad_time_ms": 9192.863, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032192638609558344, "policy_loss": -0.004382268991321325, "vf_loss": 81.57144927978516, "vf_explained_var": 0.7626829147338867, "kl": 0.001976991770789027, "entropy": 1.1112231016159058, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5299200, "episodes_total": 13248, "training_iteration": 414, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-19-39", "timestamp": 1660259979, "time_this_iter_s": 36.33256697654724, "time_total_s": 18388.890284061432, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18388.890284061432, "timesteps_since_restore": 5299200, "iterations_since_restore": 414, "perf": {"cpu_util_percent": 31.756862745098033, "ram_util_percent": 59.57450980392157}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.4}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.4, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.55, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.69, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0, 587.0, 522.0, 636.0, 630.0, 627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0, 627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8141797334624801, "mean_processing_ms": 0.23126474228719665, "mean_inference_ms": 1.4144802295158576}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9960000, "num_steps_sampled": 5312000, "sample_time_ms": 21138.109, "load_time_ms": 36.708, "grad_time_ms": 9062.52, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00037816105759702623, "policy_loss": -0.006607938092201948, "vf_loss": 75.41075897216797, "vf_explained_var": 0.7763264775276184, "kl": 0.0018363663693889976, "entropy": 1.1099668741226196, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5312000, "episodes_total": 13280, "training_iteration": 415, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-20-09", "timestamp": 1660260009, "time_this_iter_s": 29.82709288597107, "time_total_s": 18418.717376947403, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18418.717376947403, "timesteps_since_restore": 5312000, "iterations_since_restore": 415, "perf": {"cpu_util_percent": 32.99761904761905, "ram_util_percent": 59.095238095238095}} +{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 615.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.985}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.37, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.66, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.99, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.99, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.99, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0, 630.0, 630.0, 590.0, 633.0, 627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0, 630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8133221505395263, "mean_processing_ms": 0.23109594102314795, "mean_inference_ms": 1.413916507245515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9984000, "num_steps_sampled": 5324800, "sample_time_ms": 21409.91, "load_time_ms": 36.506, "grad_time_ms": 8913.404, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003510029288008809, "policy_loss": -0.0037950894329696894, "vf_loss": 78.6290054321289, "vf_explained_var": 0.7686605453491211, "kl": 0.0018828777829185128, "entropy": 1.1155847311019897, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5324800, "episodes_total": 13312, "training_iteration": 416, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-20-41", "timestamp": 1660260041, "time_this_iter_s": 31.975250005722046, "time_total_s": 18450.692626953125, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18450.692626953125, "timesteps_since_restore": 5324800, "iterations_since_restore": 416, "perf": {"cpu_util_percent": 31.96888888888889, "ram_util_percent": 59.13111111111111}} +{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 613.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.745}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.49, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.76, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.57, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0, 582.0, 636.0, 639.0, 636.0, 630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0, 636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.812458332690278, "mean_processing_ms": 0.23092571272245643, "mean_inference_ms": 1.413108766021267}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10008000, "num_steps_sampled": 5337600, "sample_time_ms": 21466.731, "load_time_ms": 36.538, "grad_time_ms": 8937.935, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011007506400346756, "policy_loss": -0.005807527806609869, "vf_loss": 74.63658905029297, "vf_explained_var": 0.7761281132698059, "kl": 0.0020840545184910297, "entropy": 1.110751986503601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5337600, "episodes_total": 13344, "training_iteration": 417, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-21-11", "timestamp": 1660260071, "time_this_iter_s": 30.376654863357544, "time_total_s": 18481.069281816483, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18481.069281816483, "timesteps_since_restore": 5337600, "iterations_since_restore": 417, "perf": {"cpu_util_percent": 32.944186046511625, "ram_util_percent": 59.151162790697676}} +{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 613.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.775}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.55, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.55, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0, 627.0, 633.0, 627.0, 561.0, 636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0, 567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8116024056297, "mean_processing_ms": 0.23075773519114987, "mean_inference_ms": 1.4123586541833584}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10032000, "num_steps_sampled": 5350400, "sample_time_ms": 21668.296, "load_time_ms": 36.478, "grad_time_ms": 9333.309, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028228743467479944, "policy_loss": -0.004008984658867121, "vf_loss": 73.87229919433594, "vf_explained_var": 0.7751579284667969, "kl": 0.0019005200592800975, "entropy": 1.110758900642395, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5350400, "episodes_total": 13376, "training_iteration": 418, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-21-46", "timestamp": 1660260106, "time_this_iter_s": 34.32990908622742, "time_total_s": 18515.39919090271, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18515.39919090271, "timesteps_since_restore": 5350400, "iterations_since_restore": 418, "perf": {"cpu_util_percent": 29.667346938775513, "ram_util_percent": 59.18979591836735}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 609.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.74}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.72, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0, 630.0, 582.0, 627.0, 633.0, 567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0, 587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8107503957777891, "mean_processing_ms": 0.2305898995792267, "mean_inference_ms": 1.4116530949413433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10056000, "num_steps_sampled": 5363200, "sample_time_ms": 21868.435, "load_time_ms": 36.628, "grad_time_ms": 9569.244, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002508052857592702, "policy_loss": -0.004472339991480112, "vf_loss": 75.3826904296875, "vf_explained_var": 0.7911410927772522, "kl": 0.0020311845000833273, "entropy": 1.1157482862472534, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5363200, "episodes_total": 13408, "training_iteration": 419, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-22-20", "timestamp": 1660260140, "time_this_iter_s": 33.75737500190735, "time_total_s": 18549.156565904617, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18549.156565904617, "timesteps_since_restore": 5363200, "iterations_since_restore": 419, "perf": {"cpu_util_percent": 32.68936170212766, "ram_util_percent": 59.19574468085105}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 609.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.805}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.81, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.29, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0, 636.0, 633.0, 579.0, 633.0, 587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0, 579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8099141458256162, "mean_processing_ms": 0.23042638477560978, "mean_inference_ms": 1.411223574078033}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10080000, "num_steps_sampled": 5376000, "sample_time_ms": 22436.969, "load_time_ms": 36.407, "grad_time_ms": 9832.43, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009076216374523938, "policy_loss": -0.0059606158174574375, "vf_loss": 74.23489379882812, "vf_explained_var": 0.7686769366264343, "kl": 0.00233254861086607, "entropy": 1.1105002164840698, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5376000, "episodes_total": 13440, "training_iteration": 420, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-22-56", "timestamp": 1660260176, "time_this_iter_s": 36.52545118331909, "time_total_s": 18585.682017087936, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18585.682017087936, "timesteps_since_restore": 5376000, "iterations_since_restore": 420, "perf": {"cpu_util_percent": 30.815384615384616, "ram_util_percent": 59.175}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 604.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.165}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 185.13, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.58, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.15, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0, 627.0, 536.0, 633.0, 627.0, 579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0, 576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8090875663620912, "mean_processing_ms": 0.23026522516611045, "mean_inference_ms": 1.4109073671228203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10104000, "num_steps_sampled": 5388800, "sample_time_ms": 22748.439, "load_time_ms": 36.384, "grad_time_ms": 10256.981, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004364584165159613, "policy_loss": -0.007198403123766184, "vf_loss": 81.89620208740234, "vf_explained_var": 0.7658551335334778, "kl": 0.0018362547270953655, "entropy": 1.1095339059829712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5388800, "episodes_total": 13472, "training_iteration": 421, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-23-34", "timestamp": 1660260214, "time_this_iter_s": 37.69177174568176, "time_total_s": 18623.373788833618, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18623.373788833618, "timesteps_since_restore": 5388800, "iterations_since_restore": 421, "perf": {"cpu_util_percent": 34.76037735849056, "ram_util_percent": 59.533962264150944}} +{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 605.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.835}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.67, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.3, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0, 587.0, 630.0, 627.0, 639.0, 576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0, 627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8082635107475633, "mean_processing_ms": 0.23010486784203785, "mean_inference_ms": 1.4106251231358269}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10128000, "num_steps_sampled": 5401600, "sample_time_ms": 23169.092, "load_time_ms": 36.151, "grad_time_ms": 10580.278, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029069948941469193, "policy_loss": -0.004262510221451521, "vf_loss": 77.2344970703125, "vf_explained_var": 0.7713862061500549, "kl": 0.001992677804082632, "entropy": 1.1078964471817017, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5401600, "episodes_total": 13504, "training_iteration": 422, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-24-10", "timestamp": 1660260250, "time_this_iter_s": 35.70268106460571, "time_total_s": 18659.076469898224, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18659.076469898224, "timesteps_since_restore": 5401600, "iterations_since_restore": 422, "perf": {"cpu_util_percent": 33.46078431372549, "ram_util_percent": 59.009803921568626}} +{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 603.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.715}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.43, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.22, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.81, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0, 639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8074351528225807, "mean_processing_ms": 0.2299430227686211, "mean_inference_ms": 1.4101585420796834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10152000, "num_steps_sampled": 5414400, "sample_time_ms": 23238.003, "load_time_ms": 36.154, "grad_time_ms": 10699.531, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004093436989933252, "policy_loss": -0.003705031471326947, "vf_loss": 83.51854705810547, "vf_explained_var": 0.7628346085548401, "kl": 0.001839231583289802, "entropy": 1.1067644357681274, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5414400, "episodes_total": 13536, "training_iteration": 423, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-24-43", "timestamp": 1660260283, "time_this_iter_s": 33.482574224472046, "time_total_s": 18692.559044122696, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18692.559044122696, "timesteps_since_restore": 5414400, "iterations_since_restore": 423, "perf": {"cpu_util_percent": 33.295744680851065, "ram_util_percent": 58.97021276595746}} +{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 602.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.37}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.94, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.71, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0, 579.0, 470.0, 587.0, 579.0, 639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0, 582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8066043021110226, "mean_processing_ms": 0.22977916427475648, "mean_inference_ms": 1.4095576278285673}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10176000, "num_steps_sampled": 5427200, "sample_time_ms": 22903.737, "load_time_ms": 36.042, "grad_time_ms": 10790.567, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005737189203500748, "policy_loss": -0.006916053593158722, "vf_loss": 80.42852783203125, "vf_explained_var": 0.7650584578514099, "kl": 0.0017220000736415386, "entropy": 1.1061476469039917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5427200, "episodes_total": 13568, "training_iteration": 424, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-25-17", "timestamp": 1660260317, "time_this_iter_s": 33.90529203414917, "time_total_s": 18726.464336156845, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18726.464336156845, "timesteps_since_restore": 5427200, "iterations_since_restore": 424, "perf": {"cpu_util_percent": 32.64166666666667, "ram_util_percent": 58.96875}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 606.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.09}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.58, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0, 633.0, 579.0, 587.0, 579.0, 582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0, 582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8057716704864768, "mean_processing_ms": 0.22961462225140888, "mean_inference_ms": 1.4087586857000423}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10200000, "num_steps_sampled": 5440000, "sample_time_ms": 22855.818, "load_time_ms": 36.111, "grad_time_ms": 10847.011, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001967804506421089, "policy_loss": -0.00539380731061101, "vf_loss": 79.134033203125, "vf_explained_var": 0.7726359963417053, "kl": 0.0021053599193692207, "entropy": 1.1035689115524292, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5440000, "episodes_total": 13600, "training_iteration": 425, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-25-47", "timestamp": 1660260347, "time_this_iter_s": 29.913795948028564, "time_total_s": 18756.378132104874, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18756.378132104874, "timesteps_since_restore": 5440000, "iterations_since_restore": 425, "perf": {"cpu_util_percent": 31.83333333333333, "ram_util_percent": 58.778571428571425}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 610.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 305.285}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.37, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.87, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.7, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0, 582.0, 530.0, 633.0, 630.0, 582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0, 633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8049370656544826, "mean_processing_ms": 0.22944824169786282, "mean_inference_ms": 1.4078260577315087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10224000, "num_steps_sampled": 5452800, "sample_time_ms": 22520.554, "load_time_ms": 36.416, "grad_time_ms": 10937.296, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 9.403874173585791e-06, "policy_loss": -0.006973860785365105, "vf_loss": 75.33930969238281, "vf_explained_var": 0.7694594264030457, "kl": 0.00176583684515208, "entropy": 1.1013368368148804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5452800, "episodes_total": 13632, "training_iteration": 426, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-26-16", "timestamp": 1660260376, "time_this_iter_s": 29.526015043258667, "time_total_s": 18785.904147148132, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18785.904147148132, "timesteps_since_restore": 5452800, "iterations_since_restore": 426, "perf": {"cpu_util_percent": 31.057142857142853, "ram_util_percent": 58.84047619047618}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.115}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.83, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0, 522.0, 587.0, 584.0, 579.0, 633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0, 630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8041036336722438, "mean_processing_ms": 0.2292823831802886, "mean_inference_ms": 1.4068110858832141}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10248000, "num_steps_sampled": 5465600, "sample_time_ms": 22514.038, "load_time_ms": 36.353, "grad_time_ms": 10948.965, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00023957279336173087, "policy_loss": -0.006577346473932266, "vf_loss": 73.66693878173828, "vf_explained_var": 0.7691845297813416, "kl": 0.001824389211833477, "entropy": 1.099536418914795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5465600, "episodes_total": 13664, "training_iteration": 427, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-26-47", "timestamp": 1660260407, "time_this_iter_s": 30.428364992141724, "time_total_s": 18816.332512140274, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18816.332512140274, "timesteps_since_restore": 5465600, "iterations_since_restore": 427, "perf": {"cpu_util_percent": 33.06976744186046, "ram_util_percent": 59.05581395348838}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 614.26, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.13}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.86, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.72, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.79, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.03, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0, 627.0, 627.0, 630.0, 564.0, 630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0, 630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.803277493605749, "mean_processing_ms": 0.22911824330865546, "mean_inference_ms": 1.4058271454809743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10272000, "num_steps_sampled": 5478400, "sample_time_ms": 22384.398, "load_time_ms": 36.404, "grad_time_ms": 10797.09, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016918530454859138, "policy_loss": -0.005912030581384897, "vf_loss": 81.57828521728516, "vf_explained_var": 0.7692078948020935, "kl": 0.0022940493654459715, "entropy": 1.1078943014144897, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5478400, "episodes_total": 13696, "training_iteration": 428, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-27-18", "timestamp": 1660260438, "time_this_iter_s": 31.51498508453369, "time_total_s": 18847.847497224808, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18847.847497224808, "timesteps_since_restore": 5478400, "iterations_since_restore": 428, "perf": {"cpu_util_percent": 32.73555555555556, "ram_util_percent": 59.61555555555556}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 305.475}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.35, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.98, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.73, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0, 627.0, 636.0, 587.0, 636.0, 630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8024594853265918, "mean_processing_ms": 0.2289562714759904, "mean_inference_ms": 1.4049375994877125}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10296000, "num_steps_sampled": 5491200, "sample_time_ms": 22179.532, "load_time_ms": 37.018, "grad_time_ms": 10788.772, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015958556905388832, "policy_loss": -0.010051627643406391, "vf_loss": 90.0771713256836, "vf_explained_var": 0.7485197186470032, "kl": 0.0020946140866726637, "entropy": 1.1038951873779297, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5491200, "episodes_total": 13728, "training_iteration": 429, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-27-50", "timestamp": 1660260470, "time_this_iter_s": 31.638920783996582, "time_total_s": 18879.486418008804, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18879.486418008804, "timesteps_since_restore": 5491200, "iterations_since_restore": 429, "perf": {"cpu_util_percent": 30.328888888888887, "ram_util_percent": 59.14666666666665}} +{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 605.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.585}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.97, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.35, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.68, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.17, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0, 630.0, 627.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0, 636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.801647414418135, "mean_processing_ms": 0.22879516876474576, "mean_inference_ms": 1.404093752736684}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10320000, "num_steps_sampled": 5504000, "sample_time_ms": 21723.978, "load_time_ms": 37.011, "grad_time_ms": 10801.836, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -4.737731069326401e-05, "policy_loss": -0.00790297333151102, "vf_loss": 84.07501220703125, "vf_explained_var": 0.7714950442314148, "kl": 0.0018305158009752631, "entropy": 1.1038156747817993, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5504000, "episodes_total": 13760, "training_iteration": 430, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-28-22", "timestamp": 1660260502, "time_this_iter_s": 32.1021990776062, "time_total_s": 18911.58861708641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18911.58861708641, "timesteps_since_restore": 5504000, "iterations_since_restore": 430, "perf": {"cpu_util_percent": 34.26, "ram_util_percent": 59.419999999999995}} +{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 607.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.515}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.23, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.44, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0, 633.0, 590.0, 630.0, 579.0, 636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8008314445541891, "mean_processing_ms": 0.2286322337318132, "mean_inference_ms": 1.4031615335106213}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10344000, "num_steps_sampled": 5516800, "sample_time_ms": 21234.37, "load_time_ms": 37.138, "grad_time_ms": 10446.294, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004487487021833658, "policy_loss": -0.002838247222825885, "vf_loss": 78.8043441772461, "vf_explained_var": 0.7659228444099426, "kl": 0.0018056267872452736, "entropy": 1.109397053718567, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5516800, "episodes_total": 13792, "training_iteration": 431, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-28-51", "timestamp": 1660260531, "time_this_iter_s": 29.235426902770996, "time_total_s": 18940.82404398918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18940.82404398918, "timesteps_since_restore": 5516800, "iterations_since_restore": 431, "perf": {"cpu_util_percent": 33.543902439024386, "ram_util_percent": 59.482926829268294}} +{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 606.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.24}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.08, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 525.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0, 630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8000283085438272, "mean_processing_ms": 0.22847315169555785, "mean_inference_ms": 1.4024220813317556}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10368000, "num_steps_sampled": 5529600, "sample_time_ms": 21335.175, "load_time_ms": 37.375, "grad_time_ms": 10351.537, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001511982991360128, "policy_loss": -0.006039683241397142, "vf_loss": 81.0816650390625, "vf_explained_var": 0.766996443271637, "kl": 0.0019059469923377037, "entropy": 1.1129895448684692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5529600, "episodes_total": 13824, "training_iteration": 432, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-29-27", "timestamp": 1660260567, "time_this_iter_s": 35.764232873916626, "time_total_s": 18976.588276863098, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18976.588276863098, "timesteps_since_restore": 5529600, "iterations_since_restore": 432, "perf": {"cpu_util_percent": 31.023529411764706, "ram_util_percent": 59.011764705882364}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 609.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.97}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.14, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.51, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.86, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.37, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.51, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.51, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0, 576.0, 636.0, 587.0, 630.0, 630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0, 579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7992234752910505, "mean_processing_ms": 0.2283129313200136, "mean_inference_ms": 1.4016145546737357}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10392000, "num_steps_sampled": 5542400, "sample_time_ms": 21105.654, "load_time_ms": 37.195, "grad_time_ms": 10237.421, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00017731667321640998, "policy_loss": -0.007162818219512701, "vf_loss": 79.00240325927734, "vf_explained_var": 0.7636518478393555, "kl": 0.0019576705526560545, "entropy": 1.1202179193496704, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5542400, "episodes_total": 13856, "training_iteration": 433, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-29-57", "timestamp": 1660260597, "time_this_iter_s": 30.04263925552368, "time_total_s": 19006.630916118622, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19006.630916118622, "timesteps_since_restore": 5542400, "iterations_since_restore": 433, "perf": {"cpu_util_percent": 28.414285714285718, "ram_util_percent": 59.08571428571428}} +{"episode_reward_max": 636.0, "episode_reward_min": 522.0, "episode_reward_mean": 608.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.035}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.27, "shaped_reward_min": 162, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.01, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0, 636.0, 587.0, 587.0, 624.0, 579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7984267196562377, "mean_processing_ms": 0.2281543074598729, "mean_inference_ms": 1.4008634634421164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10416000, "num_steps_sampled": 5555200, "sample_time_ms": 20963.356, "load_time_ms": 37.144, "grad_time_ms": 9878.857, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020503299310803413, "policy_loss": -0.0053678578697144985, "vf_loss": 79.79612731933594, "vf_explained_var": 0.7709012627601624, "kl": 0.0022744529414922, "entropy": 1.1228529214859009, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5555200, "episodes_total": 13888, "training_iteration": 434, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-30-26", "timestamp": 1660260626, "time_this_iter_s": 28.891623735427856, "time_total_s": 19035.52253985405, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19035.52253985405, "timesteps_since_restore": 5555200, "iterations_since_restore": 434, "perf": {"cpu_util_percent": 35.10975609756097, "ram_util_percent": 59.18292682926829}} +{"episode_reward_max": 639.0, "episode_reward_min": 539.0, "episode_reward_mean": 609.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 268.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.505}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.81, "shaped_reward_min": 170, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.62, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0, 636.0, 630.0, 582.0, 630.0, 633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7976245186877899, "mean_processing_ms": 0.22799297194787038, "mean_inference_ms": 1.3999329365484723}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10440000, "num_steps_sampled": 5568000, "sample_time_ms": 21006.175, "load_time_ms": 37.426, "grad_time_ms": 9977.404, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013511897996068, "policy_loss": -0.006047597620636225, "vf_loss": 79.5729751586914, "vf_explained_var": 0.7702791094779968, "kl": 0.001890461309812963, "entropy": 1.1170209646224976, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5568000, "episodes_total": 13920, "training_iteration": 435, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-30-58", "timestamp": 1660260658, "time_this_iter_s": 31.331193923950195, "time_total_s": 19066.853733778, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19066.853733778, "timesteps_since_restore": 5568000, "iterations_since_restore": 435, "perf": {"cpu_util_percent": 33.626666666666665, "ram_util_percent": 58.966666666666676}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 605.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.93}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.46, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0, 636.0, 582.0, 582.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0, 582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7968373127766308, "mean_processing_ms": 0.22783629500024907, "mean_inference_ms": 1.3993492052434942}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10464000, "num_steps_sampled": 5580800, "sample_time_ms": 21691.27, "load_time_ms": 37.308, "grad_time_ms": 10128.337, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022821128368377686, "policy_loss": -0.005102970637381077, "vf_loss": 79.3483657836914, "vf_explained_var": 0.7713219523429871, "kl": 0.0023417342454195023, "entropy": 1.0995064973831177, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5580800, "episodes_total": 13952, "training_iteration": 436, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-31-35", "timestamp": 1660260695, "time_this_iter_s": 37.88511109352112, "time_total_s": 19104.73884487152, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19104.73884487152, "timesteps_since_restore": 5580800, "iterations_since_restore": 436, "perf": {"cpu_util_percent": 29.92452830188679, "ram_util_percent": 59.107547169811326}} +{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 607.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.775}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.95, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.66, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0, 633.0, 590.0, 636.0, 539.0, 582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0, 630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.796054313311972, "mean_processing_ms": 0.22768002877826965, "mean_inference_ms": 1.3988094341608432}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10488000, "num_steps_sampled": 5593600, "sample_time_ms": 21763.453, "load_time_ms": 37.236, "grad_time_ms": 10078.856, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00570017471909523, "policy_loss": -0.002029874362051487, "vf_loss": 82.7793960571289, "vf_explained_var": 0.7683680653572083, "kl": 0.002793658524751663, "entropy": 1.095770001411438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5593600, "episodes_total": 13984, "training_iteration": 437, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-32-06", "timestamp": 1660260726, "time_this_iter_s": 30.656537771224976, "time_total_s": 19135.395382642746, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19135.395382642746, "timesteps_since_restore": 5593600, "iterations_since_restore": 437, "perf": {"cpu_util_percent": 32.4046511627907, "ram_util_percent": 59.06744186046511}} +{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 609.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.745}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.89, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0, 584.0, 587.0, 576.0, 582.0, 630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0, 627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7952795152199544, "mean_processing_ms": 0.22752629484006134, "mean_inference_ms": 1.3983582965229155}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10512000, "num_steps_sampled": 5606400, "sample_time_ms": 21892.052, "load_time_ms": 37.192, "grad_time_ms": 10267.0, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008096967940218747, "policy_loss": -0.008205131627619267, "vf_loss": 79.47277069091797, "vf_explained_var": 0.777022659778595, "kl": 0.002324033295735717, "entropy": 1.1036995649337769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5606400, "episodes_total": 14016, "training_iteration": 438, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-32-41", "timestamp": 1660260761, "time_this_iter_s": 34.68048119544983, "time_total_s": 19170.075863838196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19170.075863838196, "timesteps_since_restore": 5606400, "iterations_since_restore": 438, "perf": {"cpu_util_percent": 33.92857142857143, "ram_util_percent": 59.06734693877551}} +{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 616.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.265}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 189.33, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.34, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.91, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.34, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.34, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0, 630.0, 633.0, 582.0, 582.0, 627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0, 627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7945004798552335, "mean_processing_ms": 0.22737090000954274, "mean_inference_ms": 1.3975837411593142}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10536000, "num_steps_sampled": 5619200, "sample_time_ms": 21744.649, "load_time_ms": 36.586, "grad_time_ms": 10113.951, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006775472429580986, "policy_loss": -0.006301699206233025, "vf_loss": 75.32054138183594, "vf_explained_var": 0.7757834792137146, "kl": 0.0017814143793657422, "entropy": 1.1056231260299683, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5619200, "episodes_total": 14048, "training_iteration": 439, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-33-09", "timestamp": 1660260789, "time_this_iter_s": 28.622015953063965, "time_total_s": 19198.69787979126, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19198.69787979126, "timesteps_since_restore": 5619200, "iterations_since_restore": 439, "perf": {"cpu_util_percent": 31.939024390243897, "ram_util_percent": 59.1219512195122}} +{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 620.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 310.075}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 190.15, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.29, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.46, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.46, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.46, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0, 636.0, 584.0, 636.0, 636.0, 627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0, 627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7937258148671773, "mean_processing_ms": 0.22721751341177562, "mean_inference_ms": 1.3967610737623508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10560000, "num_steps_sampled": 5632000, "sample_time_ms": 21732.229, "load_time_ms": 36.475, "grad_time_ms": 9880.218, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00015868060290813446, "policy_loss": -0.007246671710163355, "vf_loss": 76.3777847290039, "vf_explained_var": 0.7677585482597351, "kl": 0.0021524711046367884, "entropy": 1.099584698677063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5632000, "episodes_total": 14080, "training_iteration": 440, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-33-39", "timestamp": 1660260819, "time_this_iter_s": 29.637184143066406, "time_total_s": 19228.335063934326, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19228.335063934326, "timesteps_since_restore": 5632000, "iterations_since_restore": 440, "perf": {"cpu_util_percent": 33.75, "ram_util_percent": 59.899999999999984}} +{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 617.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.88}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.36, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.01, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.27, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.84, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.16, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.73, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.27, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.27, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0, 408.0, 579.0, 630.0, 582.0, 627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0, 584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7929580463765498, "mean_processing_ms": 0.22706572704499173, "mean_inference_ms": 1.3960311893008097}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10584000, "num_steps_sampled": 5644800, "sample_time_ms": 22196.121, "load_time_ms": 36.22, "grad_time_ms": 10110.176, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019730820786207914, "policy_loss": -0.005001601297408342, "vf_loss": 75.2809066772461, "vf_explained_var": 0.770819902420044, "kl": 0.0019049126422032714, "entropy": 1.106797695159912, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5644800, "episodes_total": 14112, "training_iteration": 441, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-34-15", "timestamp": 1660260855, "time_this_iter_s": 36.16889190673828, "time_total_s": 19264.503955841064, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19264.503955841064, "timesteps_since_restore": 5644800, "iterations_since_restore": 441, "perf": {"cpu_util_percent": 30.368627450980394, "ram_util_percent": 59.160784313725486}} +{"episode_reward_max": 639.0, "episode_reward_min": 564.0, "episode_reward_mean": 618.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 279.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.15}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.85, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.74, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.31, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0, 570.0, 636.0, 579.0, 639.0, 584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0, 633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7921926702018212, "mean_processing_ms": 0.22691451487922393, "mean_inference_ms": 1.3953653520568468}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10608000, "num_steps_sampled": 5657600, "sample_time_ms": 21841.972, "load_time_ms": 35.995, "grad_time_ms": 10215.543, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027040394488722086, "policy_loss": -0.004097369499504566, "vf_loss": 73.54324340820312, "vf_explained_var": 0.7729549407958984, "kl": 0.0019481302006170154, "entropy": 1.105837106704712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5657600, "episodes_total": 14144, "training_iteration": 442, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-34-49", "timestamp": 1660260889, "time_this_iter_s": 33.27770400047302, "time_total_s": 19297.781659841537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19297.781659841537, "timesteps_since_restore": 5657600, "iterations_since_restore": 442, "perf": {"cpu_util_percent": 29.43404255319148, "ram_util_percent": 59.22553191489361}} +{"episode_reward_max": 639.0, "episode_reward_min": 564.0, "episode_reward_mean": 615.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 280.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.955}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.71, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.2, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.92, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0, 639.0, 633.0, 630.0, 587.0, 633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0, 627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7914282474248366, "mean_processing_ms": 0.22676310167632485, "mean_inference_ms": 1.3947257800951314}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10632000, "num_steps_sampled": 5670400, "sample_time_ms": 21982.654, "load_time_ms": 36.099, "grad_time_ms": 10196.436, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026817016769200563, "policy_loss": -0.004366503097116947, "vf_loss": 76.00869750976562, "vf_explained_var": 0.7792714238166809, "kl": 0.0014469980960711837, "entropy": 1.1053307056427002, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5670400, "episodes_total": 14176, "training_iteration": 443, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-35-20", "timestamp": 1660260920, "time_this_iter_s": 31.264520168304443, "time_total_s": 19329.046180009842, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19329.046180009842, "timesteps_since_restore": 5670400, "iterations_since_restore": 443, "perf": {"cpu_util_percent": 30.41136363636364, "ram_util_percent": 59.12272727272728}} +{"episode_reward_max": 639.0, "episode_reward_min": 479.0, "episode_reward_mean": 614.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.27}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.54, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.25, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.11, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0, 633.0, 630.0, 639.0, 636.0, 627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0, 633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7906650471868468, "mean_processing_ms": 0.2266115620582618, "mean_inference_ms": 1.3940369234530334}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10656000, "num_steps_sampled": 5683200, "sample_time_ms": 22188.83, "load_time_ms": 36.224, "grad_time_ms": 10331.481, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037986973766237497, "policy_loss": -0.0035794072318822145, "vf_loss": 79.28974151611328, "vf_explained_var": 0.7626357078552246, "kl": 0.0019579820800572634, "entropy": 1.1017413139343262, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5683200, "episodes_total": 14208, "training_iteration": 444, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-35-52", "timestamp": 1660260952, "time_this_iter_s": 32.303210973739624, "time_total_s": 19361.34939098358, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19361.34939098358, "timesteps_since_restore": 5683200, "iterations_since_restore": 444, "perf": {"cpu_util_percent": 30.893478260869564, "ram_util_percent": 59.16521739130435}} +{"episode_reward_max": 639.0, "episode_reward_min": 479.0, "episode_reward_mean": 610.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.46}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.12, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0, 630.0, 564.0, 576.0, 636.0, 633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0, 636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7899054631660849, "mean_processing_ms": 0.22646096684642672, "mean_inference_ms": 1.3933342141949396}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10680000, "num_steps_sampled": 5696000, "sample_time_ms": 22242.608, "load_time_ms": 35.979, "grad_time_ms": 10328.703, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013677343958988786, "policy_loss": -0.006408216897398233, "vf_loss": 83.29845428466797, "vf_explained_var": 0.7579674124717712, "kl": 0.001941792550496757, "entropy": 1.1077739000320435, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5696000, "episodes_total": 14240, "training_iteration": 445, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-36-24", "timestamp": 1660260984, "time_this_iter_s": 31.836724996566772, "time_total_s": 19393.18611598015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19393.18611598015, "timesteps_since_restore": 5696000, "iterations_since_restore": 445, "perf": {"cpu_util_percent": 30.162222222222226, "ram_util_percent": 59.18000000000001}} +{"episode_reward_max": 639.0, "episode_reward_min": 456.0, "episode_reward_mean": 608.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.41}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.22, "shaped_reward_min": 136, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0, 627.0, 630.0, 587.0, 579.0, 636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0, 587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7891523423128755, "mean_processing_ms": 0.22631170679234558, "mean_inference_ms": 1.3926725732710878}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10704000, "num_steps_sampled": 5708800, "sample_time_ms": 21771.335, "load_time_ms": 36.027, "grad_time_ms": 10385.96, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026401570066809654, "policy_loss": -0.0047949193976819515, "vf_loss": 79.88745880126953, "vf_explained_var": 0.7707352638244629, "kl": 0.0022178192157298326, "entropy": 1.107340693473816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5708800, "episodes_total": 14272, "training_iteration": 446, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-36-58", "timestamp": 1660261018, "time_this_iter_s": 33.74682116508484, "time_total_s": 19426.932937145233, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19426.932937145233, "timesteps_since_restore": 5708800, "iterations_since_restore": 446, "perf": {"cpu_util_percent": 32.958333333333336, "ram_util_percent": 59.18541666666667}} +{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 602.99, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.495}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.79, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0, 633.0, 479.0, 587.0, 587.0, 587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0, 627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7883964067869212, "mean_processing_ms": 0.22616136742936413, "mean_inference_ms": 1.39185520221877}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10728000, "num_steps_sampled": 5721600, "sample_time_ms": 21601.13, "load_time_ms": 36.066, "grad_time_ms": 10530.811, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004505176562815905, "policy_loss": -0.00313469092361629, "vf_loss": 81.9230728149414, "vf_explained_var": 0.7751343250274658, "kl": 0.002040610648691654, "entropy": 1.1048672199249268, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5721600, "episodes_total": 14304, "training_iteration": 447, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-37-28", "timestamp": 1660261048, "time_this_iter_s": 30.400289058685303, "time_total_s": 19457.33322620392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19457.33322620392, "timesteps_since_restore": 5721600, "iterations_since_restore": 447, "perf": {"cpu_util_percent": 36.12558139534883, "ram_util_percent": 59.20697674418605}} +{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 604.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.49}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.38, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.39, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0, 639.0, 630.0, 630.0, 582.0, 627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0, 636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7876488216946843, "mean_processing_ms": 0.22601276823059913, "mean_inference_ms": 1.3911503009579902}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10752000, "num_steps_sampled": 5734400, "sample_time_ms": 21718.783, "load_time_ms": 35.962, "grad_time_ms": 10435.936, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016340842703357339, "policy_loss": -0.0055215489119291306, "vf_loss": 77.1473388671875, "vf_explained_var": 0.7692286968231201, "kl": 0.001808720058761537, "entropy": 1.1181970834732056, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5734400, "episodes_total": 14336, "training_iteration": 448, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-38-03", "timestamp": 1660261083, "time_this_iter_s": 34.906923055648804, "time_total_s": 19492.240149259567, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19492.240149259567, "timesteps_since_restore": 5734400, "iterations_since_restore": 448, "perf": {"cpu_util_percent": 33.91428571428571, "ram_util_percent": 59.09591836734693}} +{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 605.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.61}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.62, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.6, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0, 587.0, 627.0, 522.0, 582.0, 636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0, 587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7869063801847372, "mean_processing_ms": 0.22586438145730278, "mean_inference_ms": 1.3903532948966142}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10776000, "num_steps_sampled": 5747200, "sample_time_ms": 21734.341, "load_time_ms": 36.357, "grad_time_ms": 10633.962, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015590289840474725, "policy_loss": -0.005610723048448563, "vf_loss": 77.2466812133789, "vf_explained_var": 0.7718032002449036, "kl": 0.0017093941569328308, "entropy": 1.1098432540893555, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5747200, "episodes_total": 14368, "training_iteration": 449, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-38-34", "timestamp": 1660261114, "time_this_iter_s": 30.762639045715332, "time_total_s": 19523.002788305283, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19523.002788305283, "timesteps_since_restore": 5747200, "iterations_since_restore": 449, "perf": {"cpu_util_percent": 34.67441860465116, "ram_util_percent": 60.09767441860465}} +{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 607.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.755}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.31, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.27, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0, 636.0, 587.0, 636.0, 582.0, 587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0, 582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7861726618280127, "mean_processing_ms": 0.22571819485718037, "mean_inference_ms": 1.389515005345911}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10800000, "num_steps_sampled": 5760000, "sample_time_ms": 21545.479, "load_time_ms": 37.135, "grad_time_ms": 10871.751, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00040180576615966856, "policy_loss": -0.007139734923839569, "vf_loss": 80.9995346069336, "vf_explained_var": 0.7637953758239746, "kl": 0.0017641382291913033, "entropy": 1.1168159246444702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5760000, "episodes_total": 14400, "training_iteration": 450, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-39-04", "timestamp": 1660261144, "time_this_iter_s": 30.14027214050293, "time_total_s": 19553.143060445786, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19553.143060445786, "timesteps_since_restore": 5760000, "iterations_since_restore": 450, "perf": {"cpu_util_percent": 35.460465116279074, "ram_util_percent": 59.4627906976744}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 599.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.755}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 183.91, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0, 627.0, 576.0, 570.0, 582.0, 582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0, 573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7854523352273275, "mean_processing_ms": 0.22557596598023225, "mean_inference_ms": 1.3886629869315275}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10824000, "num_steps_sampled": 5772800, "sample_time_ms": 21458.474, "load_time_ms": 37.866, "grad_time_ms": 10821.902, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004755727481096983, "policy_loss": -0.00296382955275476, "vf_loss": 82.76275634765625, "vf_explained_var": 0.7805452942848206, "kl": 0.0020347917452454567, "entropy": 1.1134214401245117, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5772800, "episodes_total": 14432, "training_iteration": 451, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-39-39", "timestamp": 1660261179, "time_this_iter_s": 34.819623947143555, "time_total_s": 19587.96268439293, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19587.96268439293, "timesteps_since_restore": 5772800, "iterations_since_restore": 451, "perf": {"cpu_util_percent": 35.726, "ram_util_percent": 59.13399999999999}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 600.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.195}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.39, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.4, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0, 636.0, 582.0, 630.0, 636.0, 573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0, 627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7847405370697927, "mean_processing_ms": 0.22543724156360243, "mean_inference_ms": 1.3878004584531793}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10848000, "num_steps_sampled": 5785600, "sample_time_ms": 21368.51, "load_time_ms": 38.205, "grad_time_ms": 10677.018, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004785877245012671, "policy_loss": -0.0067210569977760315, "vf_loss": 77.5384750366211, "vf_explained_var": 0.777080774307251, "kl": 0.0022153640165925026, "entropy": 1.108397126197815, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5785600, "episodes_total": 14464, "training_iteration": 452, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-40-10", "timestamp": 1660261210, "time_this_iter_s": 30.929455280303955, "time_total_s": 19618.892139673233, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19618.892139673233, "timesteps_since_restore": 5785600, "iterations_since_restore": 452, "perf": {"cpu_util_percent": 34.12045454545454, "ram_util_percent": 59.23863636363635}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 601.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.725}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 185.05, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0, 627.0, 587.0, 582.0, 587.0, 627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0, 630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7840324712041147, "mean_processing_ms": 0.22529998529372405, "mean_inference_ms": 1.3869181064814406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10872000, "num_steps_sampled": 5798400, "sample_time_ms": 21127.393, "load_time_ms": 38.987, "grad_time_ms": 10639.287, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022095281165093184, "policy_loss": -0.005497789476066828, "vf_loss": 82.6261215209961, "vf_explained_var": 0.7598109245300293, "kl": 0.0015994912246242166, "entropy": 1.110588550567627, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5798400, "episodes_total": 14496, "training_iteration": 453, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-40-38", "timestamp": 1660261238, "time_this_iter_s": 28.482766151428223, "time_total_s": 19647.37490582466, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19647.37490582466, "timesteps_since_restore": 5798400, "iterations_since_restore": 453, "perf": {"cpu_util_percent": 34.01, "ram_util_percent": 58.98499999999999}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 609.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.53}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.46, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.14, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0, 630.0, 639.0, 630.0, 570.0, 630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.783318071764331, "mean_processing_ms": 0.22516025845999352, "mean_inference_ms": 1.3858608320735286}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10896000, "num_steps_sampled": 5811200, "sample_time_ms": 20778.477, "load_time_ms": 39.252, "grad_time_ms": 10489.319, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007634037174284458, "policy_loss": -0.006680456455796957, "vf_loss": 80.01913452148438, "vf_explained_var": 0.7667891383171082, "kl": 0.0017371875001117587, "entropy": 1.1161128282546997, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5811200, "episodes_total": 14528, "training_iteration": 454, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-41-06", "timestamp": 1660261266, "time_this_iter_s": 27.318589210510254, "time_total_s": 19674.69349503517, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19674.69349503517, "timesteps_since_restore": 5811200, "iterations_since_restore": 454, "perf": {"cpu_util_percent": 37.051282051282044, "ram_util_percent": 58.9923076923077}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 606.28, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.14}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0, 582.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0, 639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7826053116086107, "mean_processing_ms": 0.22502024237804114, "mean_inference_ms": 1.384772842121544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10920000, "num_steps_sampled": 5824000, "sample_time_ms": 20652.138, "load_time_ms": 39.437, "grad_time_ms": 10376.522, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003411610086914152, "policy_loss": -0.0072668264620006084, "vf_loss": 81.6093978881836, "vf_explained_var": 0.7669034600257874, "kl": 0.0018620697082951665, "entropy": 1.1059015989303589, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5824000, "episodes_total": 14560, "training_iteration": 455, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-41-35", "timestamp": 1660261295, "time_this_iter_s": 29.449601650238037, "time_total_s": 19704.14309668541, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19704.14309668541, "timesteps_since_restore": 5824000, "iterations_since_restore": 455, "perf": {"cpu_util_percent": 37.61666666666666, "ram_util_percent": 59.06190476190477}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 607.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.67}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.54, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0, 633.0, 633.0, 587.0, 633.0, 639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0, 633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 0.781893848476972, "mean_processing_ms": 0.22487934380632985, "mean_inference_ms": 1.3836766382532326}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10944000, "num_steps_sampled": 5836800, "sample_time_ms": 20326.683, "load_time_ms": 39.601, "grad_time_ms": 10224.263, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009388479520566761, "policy_loss": -0.008330571465194225, "vf_loss": 79.4411849975586, "vf_explained_var": 0.7653481960296631, "kl": 0.0017687659710645676, "entropy": 1.1048110723495483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5836800, "episodes_total": 14592, "training_iteration": 456, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-42-04", "timestamp": 1660261324, "time_this_iter_s": 28.971395254135132, "time_total_s": 19733.114491939545, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19733.114491939545, "timesteps_since_restore": 5836800, "iterations_since_restore": 456, "perf": {"cpu_util_percent": 36.80731707317073, "ram_util_percent": 59.075609756097556}} +{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 598.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 299.33}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.26, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.33, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0, 636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7811987617860328, "mean_processing_ms": 0.2247433395983137, "mean_inference_ms": 1.3827621285902887}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10968000, "num_steps_sampled": 5849600, "sample_time_ms": 20643.82, "load_time_ms": 39.721, "grad_time_ms": 10128.234, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00031386129558086395, "policy_loss": -0.007833792828023434, "vf_loss": 87.0155258178711, "vf_explained_var": 0.759077787399292, "kl": 0.0023228460922837257, "entropy": 1.10780668258667, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5849600, "episodes_total": 14624, "training_iteration": 457, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-42-37", "timestamp": 1660261357, "time_this_iter_s": 32.612699031829834, "time_total_s": 19765.727190971375, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19765.727190971375, "timesteps_since_restore": 5849600, "iterations_since_restore": 457, "perf": {"cpu_util_percent": 35.49347826086956, "ram_util_percent": 59.11739130434784}} +{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 600.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 300.29}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.98, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.3, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.66, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0, 636.0, 627.0, 587.0, 624.0, 636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0, 519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7805088940621119, "mean_processing_ms": 0.22460838488183463, "mean_inference_ms": 1.3819062694297763}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10992000, "num_steps_sampled": 5862400, "sample_time_ms": 20388.555, "load_time_ms": 39.901, "grad_time_ms": 10047.414, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00028213454061187804, "policy_loss": -0.007530031260102987, "vf_loss": 78.06029510498047, "vf_explained_var": 0.7737483382225037, "kl": 0.0017934959614649415, "entropy": 1.1162586212158203, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5862400, "episodes_total": 14656, "training_iteration": 458, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-43-08", "timestamp": 1660261388, "time_this_iter_s": 31.54933786392212, "time_total_s": 19797.276528835297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19797.276528835297, "timesteps_since_restore": 5862400, "iterations_since_restore": 458, "perf": {"cpu_util_percent": 36.01111111111111, "ram_util_percent": 59.15777777777779}} +{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 599.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 299.865}, "custom_metrics": {"sparse_reward_mean": 207.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.93, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 633.0, 579.0, 633.0, 527.0, 519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0, 582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7798432151982481, "mean_processing_ms": 0.22448068931407403, "mean_inference_ms": 1.3813562407452884}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11016000, "num_steps_sampled": 5875200, "sample_time_ms": 20860.599, "load_time_ms": 39.533, "grad_time_ms": 10085.729, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -4.9240032240049914e-05, "policy_loss": -0.007299743592739105, "vf_loss": 78.08226776123047, "vf_explained_var": 0.7718666195869446, "kl": 0.001796315424144268, "entropy": 1.1154268980026245, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5875200, "episodes_total": 14688, "training_iteration": 459, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-43-44", "timestamp": 1660261424, "time_this_iter_s": 35.86376190185547, "time_total_s": 19833.140290737152, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19833.140290737152, "timesteps_since_restore": 5875200, "iterations_since_restore": 459, "perf": {"cpu_util_percent": 35.09411764705882, "ram_util_percent": 59.11176470588236}} +{"episode_reward_max": 639.0, "episode_reward_min": 504.0, "episode_reward_mean": 606.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.165}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.73, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.58, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0, 590.0, 633.0, 582.0, 630.0, 582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0, 582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7791820626361985, "mean_processing_ms": 0.22435411593063023, "mean_inference_ms": 1.3809254829785487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11040000, "num_steps_sampled": 5888000, "sample_time_ms": 21384.075, "load_time_ms": 38.923, "grad_time_ms": 10181.416, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033431891351938248, "policy_loss": -0.0041669332422316074, "vf_loss": 80.67221069335938, "vf_explained_var": 0.7671453356742859, "kl": 0.0021624856162816286, "entropy": 1.1142171621322632, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5888000, "episodes_total": 14720, "training_iteration": 460, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-44-21", "timestamp": 1660261461, "time_this_iter_s": 36.32121300697327, "time_total_s": 19869.461503744125, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19869.461503744125, "timesteps_since_restore": 5888000, "iterations_since_restore": 460, "perf": {"cpu_util_percent": 35.05294117647058, "ram_util_percent": 59.078431372549026}} +{"episode_reward_max": 639.0, "episode_reward_min": 504.0, "episode_reward_mean": 604.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.105}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.21, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0, 587.0, 630.0, 579.0, 630.0, 582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7785286320058347, "mean_processing_ms": 0.2242302416319241, "mean_inference_ms": 1.3806320563992234}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11064000, "num_steps_sampled": 5900800, "sample_time_ms": 21464.8, "load_time_ms": 38.628, "grad_time_ms": 10142.381, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011879469966515899, "policy_loss": -0.00630860636010766, "vf_loss": 80.56136322021484, "vf_explained_var": 0.7605991363525391, "kl": 0.002013101242482662, "entropy": 1.1191506385803223, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5900800, "episodes_total": 14752, "training_iteration": 461, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-44-56", "timestamp": 1660261496, "time_this_iter_s": 35.2242169380188, "time_total_s": 19904.685720682144, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19904.685720682144, "timesteps_since_restore": 5900800, "iterations_since_restore": 461, "perf": {"cpu_util_percent": 34.286, "ram_util_percent": 59.076}} +{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 606.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.04}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.88, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.08, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.02, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0, 630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7778691140112235, "mean_processing_ms": 0.22410468367497743, "mean_inference_ms": 1.3802370398093593}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11088000, "num_steps_sampled": 5913600, "sample_time_ms": 21647.909, "load_time_ms": 38.304, "grad_time_ms": 10153.272, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001676362007856369, "policy_loss": -0.005163781810551882, "vf_loss": 73.99588012695312, "vf_explained_var": 0.7706634402275085, "kl": 0.00203719618730247, "entropy": 1.1188966035842896, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5913600, "episodes_total": 14784, "training_iteration": 462, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-45-29", "timestamp": 1660261529, "time_this_iter_s": 32.866820096969604, "time_total_s": 19937.552540779114, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19937.552540779114, "timesteps_since_restore": 5913600, "iterations_since_restore": 462, "perf": {"cpu_util_percent": 36.0304347826087, "ram_util_percent": 59.14782608695653}} +{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 606.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.275}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.95, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0, 587.0, 582.0, 582.0, 573.0, 630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0, 582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7772074101048227, "mean_processing_ms": 0.22397937610682028, "mean_inference_ms": 1.379755174295214}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11112000, "num_steps_sampled": 5926400, "sample_time_ms": 22034.25, "load_time_ms": 37.344, "grad_time_ms": 10407.822, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004309405107051134, "policy_loss": -0.0030093893874436617, "vf_loss": 78.81652069091797, "vf_explained_var": 0.7744302749633789, "kl": 0.0022333713714033365, "entropy": 1.1256990432739258, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5926400, "episodes_total": 14816, "training_iteration": 463, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-46-04", "timestamp": 1660261564, "time_this_iter_s": 34.88473105430603, "time_total_s": 19972.43727183342, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19972.43727183342, "timesteps_since_restore": 5926400, "iterations_since_restore": 463, "perf": {"cpu_util_percent": 36.355999999999995, "ram_util_percent": 59.1}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 613.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.575}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.35, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 14, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.47, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.19, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0, 587.0, 627.0, 587.0, 633.0, 582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0, 636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7765494210947501, "mean_processing_ms": 0.2238566721049911, "mean_inference_ms": 1.3793130627015395}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11136000, "num_steps_sampled": 5939200, "sample_time_ms": 22597.793, "load_time_ms": 37.078, "grad_time_ms": 10833.136, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004432214889675379, "policy_loss": -0.0028620418161153793, "vf_loss": 78.4912338256836, "vf_explained_var": 0.7597255110740662, "kl": 0.0021421227138489485, "entropy": 1.10971999168396, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5939200, "episodes_total": 14848, "training_iteration": 464, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-46-41", "timestamp": 1660261601, "time_this_iter_s": 37.22005105018616, "time_total_s": 20009.657322883606, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20009.657322883606, "timesteps_since_restore": 5939200, "iterations_since_restore": 464, "perf": {"cpu_util_percent": 33.75576923076923, "ram_util_percent": 59.192307692307686}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 609.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.695}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.39, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.12, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0, 633.0, 636.0, 630.0, 633.0, 636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0, 633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7758921958877883, "mean_processing_ms": 0.22373444703892312, "mean_inference_ms": 1.3788740058001947}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11160000, "num_steps_sampled": 5952000, "sample_time_ms": 22835.117, "load_time_ms": 36.936, "grad_time_ms": 10943.452, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008901534602046013, "policy_loss": -0.006549746263772249, "vf_loss": 80.0201416015625, "vf_explained_var": 0.7684802412986755, "kl": 0.0019470960833132267, "entropy": 1.1242157220840454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5952000, "episodes_total": 14880, "training_iteration": 465, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-47-14", "timestamp": 1660261634, "time_this_iter_s": 32.92273998260498, "time_total_s": 20042.58006286621, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20042.58006286621, "timesteps_since_restore": 5952000, "iterations_since_restore": 465, "perf": {"cpu_util_percent": 33.19787234042553, "ram_util_percent": 58.98936170212765}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 611.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.81}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.62, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.66, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.63, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0, 630.0, 582.0, 630.0, 639.0, 633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0, 639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7752302665514498, "mean_processing_ms": 0.22360935089938724, "mean_inference_ms": 1.378262918190576}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11184000, "num_steps_sampled": 5964800, "sample_time_ms": 22879.712, "load_time_ms": 37.04, "grad_time_ms": 10842.802, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008535028900951147, "policy_loss": -0.007580641657114029, "vf_loss": 72.87356567382812, "vf_explained_var": 0.7752940058708191, "kl": 0.0019255572697147727, "entropy": 1.12042236328125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5964800, "episodes_total": 14912, "training_iteration": 466, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-47-42", "timestamp": 1660261662, "time_this_iter_s": 28.411120176315308, "time_total_s": 20070.991183042526, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20070.991183042526, "timesteps_since_restore": 5964800, "iterations_since_restore": 466, "perf": {"cpu_util_percent": 30.5725, "ram_util_percent": 58.955}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 607.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.53}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.26, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.67, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.24, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 636.0, 636.0, 639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0, 579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7745646022660153, "mean_processing_ms": 0.2234810440216946, "mean_inference_ms": 1.3774520581026746}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11208000, "num_steps_sampled": 5977600, "sample_time_ms": 22667.166, "load_time_ms": 37.563, "grad_time_ms": 10782.505, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003179629857186228, "policy_loss": -0.007398936897516251, "vf_loss": 76.44898986816406, "vf_explained_var": 0.7757420539855957, "kl": 0.0019861727487295866, "entropy": 1.127841830253601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5977600, "episodes_total": 14944, "training_iteration": 467, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-48-12", "timestamp": 1660261692, "time_this_iter_s": 29.89157724380493, "time_total_s": 20100.88276028633, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20100.88276028633, "timesteps_since_restore": 5977600, "iterations_since_restore": 467, "perf": {"cpu_util_percent": 33.63333333333334, "ram_util_percent": 58.976190476190474}} +{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 606.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.42}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.04, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.18, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.42, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 579.0, 587.0, 630.0, 633.0, 579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0, 582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7738952247411988, "mean_processing_ms": 0.22335063238074299, "mean_inference_ms": 1.3764749095367632}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11232000, "num_steps_sampled": 5990400, "sample_time_ms": 22503.528, "load_time_ms": 37.786, "grad_time_ms": 10902.652, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012515783309936523, "policy_loss": -0.006079933140426874, "vf_loss": 78.91991424560547, "vf_explained_var": 0.7658045887947083, "kl": 0.0020609761122614145, "entropy": 1.1209732294082642, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5990400, "episodes_total": 14976, "training_iteration": 468, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-48-43", "timestamp": 1660261723, "time_this_iter_s": 31.115761756896973, "time_total_s": 20131.998522043228, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20131.998522043228, "timesteps_since_restore": 5990400, "iterations_since_restore": 468, "perf": {"cpu_util_percent": 34.11818181818182, "ram_util_percent": 59.265909090909076}} +{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 609.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.625}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.45, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.39, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.84, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.35, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0, 639.0, 627.0, 627.0, 633.0, 582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0, 630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7732342684064082, "mean_processing_ms": 0.22322351749716166, "mean_inference_ms": 1.3755872244816174}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11256000, "num_steps_sampled": 6003200, "sample_time_ms": 22143.958, "load_time_ms": 38.351, "grad_time_ms": 11066.072, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00040481146425008774, "policy_loss": -0.006932735443115234, "vf_loss": 70.86636352539062, "vf_explained_var": 0.7707180976867676, "kl": 0.0017913728952407837, "entropy": 1.1174226999282837, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6003200, "episodes_total": 15008, "training_iteration": 469, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-49-17", "timestamp": 1660261757, "time_this_iter_s": 33.90879726409912, "time_total_s": 20165.907319307327, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20165.907319307327, "timesteps_since_restore": 6003200, "iterations_since_restore": 469, "perf": {"cpu_util_percent": 34.28125, "ram_util_percent": 58.99583333333334}} +{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 612.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 274.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.385}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.73, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.28, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.92, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0, 582.0, 630.0, 630.0, 582.0, 630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7725761312773625, "mean_processing_ms": 0.2230967942471684, "mean_inference_ms": 1.3747759311690726}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11280000, "num_steps_sampled": 6016000, "sample_time_ms": 21879.975, "load_time_ms": 38.961, "grad_time_ms": 10890.541, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007060763309709728, "policy_loss": -0.0072075664065778255, "vf_loss": 70.60037231445312, "vf_explained_var": 0.7745871543884277, "kl": 0.0018414078513160348, "entropy": 1.1170852184295654, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6016000, "episodes_total": 15040, "training_iteration": 470, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-49-49", "timestamp": 1660261789, "time_this_iter_s": 31.930355072021484, "time_total_s": 20197.83767437935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20197.83767437935, "timesteps_since_restore": 6016000, "iterations_since_restore": 470, "perf": {"cpu_util_percent": 29.486666666666665, "ram_util_percent": 59.02666666666667}} +{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 617.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 274.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 308.805}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.41, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.03, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.8, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.14, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0, 582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7719288658166613, "mean_processing_ms": 0.22297329438160504, "mean_inference_ms": 1.3741316094375031}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11304000, "num_steps_sampled": 6028800, "sample_time_ms": 21780.977, "load_time_ms": 38.854, "grad_time_ms": 10976.386, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016910168342292309, "policy_loss": -0.008838978596031666, "vf_loss": 77.08248901367188, "vf_explained_var": 0.7691299319267273, "kl": 0.0020619730930775404, "entropy": 1.1205838918685913, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6028800, "episodes_total": 15072, "training_iteration": 471, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-50-24", "timestamp": 1660261824, "time_this_iter_s": 35.09460806846619, "time_total_s": 20232.932282447815, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20232.932282447815, "timesteps_since_restore": 6028800, "iterations_since_restore": 471, "perf": {"cpu_util_percent": 30.86, "ram_util_percent": 59.02799999999999}} +{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 276.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.62, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.21, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.62, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.19, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0, 633.0, 627.0, 633.0, 627.0, 582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0, 579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7712881392644176, "mean_processing_ms": 0.2228504081573419, "mean_inference_ms": 1.3735625457302265}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11328000, "num_steps_sampled": 6041600, "sample_time_ms": 21798.833, "load_time_ms": 39.052, "grad_time_ms": 10918.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015959719894453883, "policy_loss": -0.005134529899805784, "vf_loss": 72.90253448486328, "vf_explained_var": 0.7736382484436035, "kl": 0.00227816472761333, "entropy": 1.1195167303085327, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6041600, "episodes_total": 15104, "training_iteration": 472, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-50-57", "timestamp": 1660261857, "time_this_iter_s": 32.47214722633362, "time_total_s": 20265.40442967415, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20265.40442967415, "timesteps_since_restore": 6041600, "iterations_since_restore": 472, "perf": {"cpu_util_percent": 31.686956521739138, "ram_util_percent": 58.99347826086958}} +{"episode_reward_max": 639.0, "episode_reward_min": 498.0, "episode_reward_mean": 616.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.235}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.16, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0, 579.0, 633.0, 630.0, 552.0, 579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0, 633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7706549385151754, "mean_processing_ms": 0.22272985988411015, "mean_inference_ms": 1.3730067691447254}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11352000, "num_steps_sampled": 6054400, "sample_time_ms": 21729.717, "load_time_ms": 39.098, "grad_time_ms": 10717.617, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005160535220056772, "policy_loss": -0.007616788614541292, "vf_loss": 76.61554718017578, "vf_explained_var": 0.7677715420722961, "kl": 0.0017990797059610486, "entropy": 1.1216602325439453, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6054400, "episodes_total": 15136, "training_iteration": 473, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-51-29", "timestamp": 1660261889, "time_this_iter_s": 32.178860902786255, "time_total_s": 20297.583290576935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20297.583290576935, "timesteps_since_restore": 6054400, "iterations_since_restore": 473, "perf": {"cpu_util_percent": 29.955555555555556, "ram_util_percent": 59.05111111111111}} +{"episode_reward_max": 639.0, "episode_reward_min": 498.0, "episode_reward_mean": 616.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.225}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.05, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.67, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.89, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0, 633.0, 636.0, 630.0, 582.0, 633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7700230204234435, "mean_processing_ms": 0.22260903764962753, "mean_inference_ms": 1.3724136807505178}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11376000, "num_steps_sampled": 6067200, "sample_time_ms": 21448.766, "load_time_ms": 39.222, "grad_time_ms": 10552.154, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001221023383550346, "policy_loss": -0.005727085750550032, "vf_loss": 75.09713745117188, "vf_explained_var": 0.7705094218254089, "kl": 0.002081832615658641, "entropy": 1.1232246160507202, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6067200, "episodes_total": 15168, "training_iteration": 474, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-52-02", "timestamp": 1660261922, "time_this_iter_s": 32.74073004722595, "time_total_s": 20330.32402062416, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20330.32402062416, "timesteps_since_restore": 6067200, "iterations_since_restore": 474, "perf": {"cpu_util_percent": 29.70652173913044, "ram_util_percent": 59.01521739130436}} +{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 616.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 308.16}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.32, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.22, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.12, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.83, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.22, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.12, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.22, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.12, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0, 582.0, 630.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0, 630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7693923762268673, "mean_processing_ms": 0.22248851172311768, "mean_inference_ms": 1.3718337576179396}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11400000, "num_steps_sampled": 6080000, "sample_time_ms": 21471.253, "load_time_ms": 39.181, "grad_time_ms": 10674.814, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001056800247170031, "policy_loss": -0.005863674450665712, "vf_loss": 74.7898178100586, "vf_explained_var": 0.7796471118927002, "kl": 0.002407137770205736, "entropy": 1.1170334815979004, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6080000, "episodes_total": 15200, "training_iteration": 475, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-52-36", "timestamp": 1660261956, "time_this_iter_s": 34.37432289123535, "time_total_s": 20364.698343515396, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20364.698343515396, "timesteps_since_restore": 6080000, "iterations_since_restore": 475, "perf": {"cpu_util_percent": 34.573469387755104, "ram_util_percent": 58.995918367346945}} +{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 611.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.56}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.82, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0, 498.0, 579.0, 630.0, 630.0, 630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0, 633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7687649833136294, "mean_processing_ms": 0.2223678201524863, "mean_inference_ms": 1.3712901278888552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11424000, "num_steps_sampled": 6092800, "sample_time_ms": 21822.996, "load_time_ms": 39.16, "grad_time_ms": 10774.782, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006356429657898843, "policy_loss": -0.006720335688441992, "vf_loss": 79.15064239501953, "vf_explained_var": 0.7751259207725525, "kl": 0.0025446319486945868, "entropy": 1.1181851625442505, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6092800, "episodes_total": 15232, "training_iteration": 476, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-53-09", "timestamp": 1660261989, "time_this_iter_s": 32.92729115486145, "time_total_s": 20397.625634670258, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20397.625634670258, "timesteps_since_restore": 6092800, "iterations_since_restore": 476, "perf": {"cpu_util_percent": 31.27608695652173, "ram_util_percent": 59.04347826086958}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 607.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.615}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.83, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.43, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0, 633.0, 639.0, 582.0, 636.0, 633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0, 630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7681470637203944, "mean_processing_ms": 0.22224947461581387, "mean_inference_ms": 1.3708820792528533}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11448000, "num_steps_sampled": 6105600, "sample_time_ms": 22237.355, "load_time_ms": 38.878, "grad_time_ms": 10934.933, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003096085973083973, "policy_loss": -0.004263754468411207, "vf_loss": 79.16039276123047, "vf_explained_var": 0.7912160754203796, "kl": 0.001874853391200304, "entropy": 1.1124080419540405, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6105600, "episodes_total": 15264, "training_iteration": 477, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-53-45", "timestamp": 1660262025, "time_this_iter_s": 35.63289189338684, "time_total_s": 20433.258526563644, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20433.258526563644, "timesteps_since_restore": 6105600, "iterations_since_restore": 477, "perf": {"cpu_util_percent": 34.068627450980394, "ram_util_percent": 59.11960784313726}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.34}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.64, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.63, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0, 639.0, 630.0, 633.0, 633.0, 630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0, 630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7675359476043465, "mean_processing_ms": 0.22213291684157827, "mean_inference_ms": 1.3705566142110346}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11472000, "num_steps_sampled": 6118400, "sample_time_ms": 22722.857, "load_time_ms": 39.067, "grad_time_ms": 10783.352, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003133426944259554, "policy_loss": -0.007279651705175638, "vf_loss": 75.2169418334961, "vf_explained_var": 0.7742903232574463, "kl": 0.0020874079782515764, "entropy": 1.110769271850586, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6118400, "episodes_total": 15296, "training_iteration": 478, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-54-19", "timestamp": 1660262059, "time_this_iter_s": 34.45740509033203, "time_total_s": 20467.715931653976, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20467.715931653976, "timesteps_since_restore": 6118400, "iterations_since_restore": 478, "perf": {"cpu_util_percent": 28.777083333333334, "ram_util_percent": 59.04374999999999}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 608.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.12}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.64, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.8, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0, 633.0, 636.0, 582.0, 636.0, 630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0, 630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7669199085026867, "mean_processing_ms": 0.22201572097443972, "mean_inference_ms": 1.3701586168616826}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11496000, "num_steps_sampled": 6131200, "sample_time_ms": 22699.68, "load_time_ms": 39.016, "grad_time_ms": 10734.597, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008129358175210655, "policy_loss": -0.006242450326681137, "vf_loss": 76.11907196044922, "vf_explained_var": 0.7632293701171875, "kl": 0.0021639217156916857, "entropy": 1.1130343675613403, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6131200, "episodes_total": 15328, "training_iteration": 479, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-54-53", "timestamp": 1660262093, "time_this_iter_s": 33.185157775878906, "time_total_s": 20500.901089429855, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20500.901089429855, "timesteps_since_restore": 6131200, "iterations_since_restore": 479, "perf": {"cpu_util_percent": 30.472340425531915, "ram_util_percent": 58.97021276595746}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 613.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.835}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.56, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0, 633.0, 633.0, 582.0, 630.0, 630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0, 606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7662995807176626, "mean_processing_ms": 0.22189831707550936, "mean_inference_ms": 1.3696437835161013}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11520000, "num_steps_sampled": 6144000, "sample_time_ms": 22732.007, "load_time_ms": 38.46, "grad_time_ms": 10636.502, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033598102163523436, "policy_loss": -0.003906731028109789, "vf_loss": 78.25418090820312, "vf_explained_var": 0.768868625164032, "kl": 0.0016973327146843076, "entropy": 1.117727279663086, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6144000, "episodes_total": 15360, "training_iteration": 480, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-55-24", "timestamp": 1660262124, "time_this_iter_s": 31.27005410194397, "time_total_s": 20532.1711435318, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20532.1711435318, "timesteps_since_restore": 6144000, "iterations_since_restore": 480, "perf": {"cpu_util_percent": 31.795555555555556, "ram_util_percent": 59.01333333333335}} +{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 611.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.56}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.72, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.18, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0, 636.0, 579.0, 576.0, 633.0, 606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0, 633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7656785099973358, "mean_processing_ms": 0.2217805887765104, "mean_inference_ms": 1.3690072686883668}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11544000, "num_steps_sampled": 6156800, "sample_time_ms": 22631.205, "load_time_ms": 38.401, "grad_time_ms": 10438.368, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -1.2905181392852683e-05, "policy_loss": -0.007116043474525213, "vf_loss": 76.6054458618164, "vf_explained_var": 0.7700133323669434, "kl": 0.0019200993701815605, "entropy": 1.1147748231887817, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6156800, "episodes_total": 15392, "training_iteration": 481, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-55-56", "timestamp": 1660262156, "time_this_iter_s": 32.10103392601013, "time_total_s": 20564.27217745781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20564.27217745781, "timesteps_since_restore": 6156800, "iterations_since_restore": 481, "perf": {"cpu_util_percent": 31.702222222222222, "ram_util_percent": 59.035555555555575}} +{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 613.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.58}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.56, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.04, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.08, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.08, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.08, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0, 582.0, 630.0, 573.0, 627.0, 633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0, 578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7650611031190503, "mean_processing_ms": 0.2216638185152556, "mean_inference_ms": 1.3683447229242562}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11568000, "num_steps_sampled": 6169600, "sample_time_ms": 22476.544, "load_time_ms": 38.386, "grad_time_ms": 10500.152, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 5.255569703876972e-05, "policy_loss": -0.006667418871074915, "vf_loss": 72.75797271728516, "vf_explained_var": 0.775852620601654, "kl": 0.0019747635815292597, "entropy": 1.111660122871399, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6169600, "episodes_total": 15424, "training_iteration": 482, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-56-28", "timestamp": 1660262188, "time_this_iter_s": 31.53903889656067, "time_total_s": 20595.81121635437, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20595.81121635437, "timesteps_since_restore": 6169600, "iterations_since_restore": 482, "perf": {"cpu_util_percent": 34.43555555555556, "ram_util_percent": 58.98222222222224}} +{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 614.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.265}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.73, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.68, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0, 582.0, 633.0, 636.0, 630.0, 578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0, 636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.764446149993761, "mean_processing_ms": 0.22154654084728279, "mean_inference_ms": 1.3676326972645958}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11592000, "num_steps_sampled": 6182400, "sample_time_ms": 22364.392, "load_time_ms": 38.739, "grad_time_ms": 10348.578, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000403035432100296, "policy_loss": -0.006678896490484476, "vf_loss": 76.43026733398438, "vf_explained_var": 0.76324862241745, "kl": 0.0020988413598388433, "entropy": 1.1221919059753418, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6182400, "episodes_total": 15456, "training_iteration": 483, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-56-57", "timestamp": 1660262217, "time_this_iter_s": 29.545005083084106, "time_total_s": 20625.356221437454, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20625.356221437454, "timesteps_since_restore": 6182400, "iterations_since_restore": 483, "perf": {"cpu_util_percent": 31.23658536585366, "ram_util_percent": 59.02682926829268}} +{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 614.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.045}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.09, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.35, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0, 630.0, 633.0, 633.0, 636.0, 636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0, 636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7638319008211926, "mean_processing_ms": 0.22142837816064478, "mean_inference_ms": 1.3669160139559398}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11616000, "num_steps_sampled": 6195200, "sample_time_ms": 22342.562, "load_time_ms": 38.969, "grad_time_ms": 10165.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003674185834825039, "policy_loss": -0.0030232470016926527, "vf_loss": 72.50147247314453, "vf_explained_var": 0.7972453236579895, "kl": 0.002131336135789752, "entropy": 1.1054468154907227, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6195200, "episodes_total": 15488, "training_iteration": 484, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-57-28", "timestamp": 1660262248, "time_this_iter_s": 30.70011305809021, "time_total_s": 20656.056334495544, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20656.056334495544, "timesteps_since_restore": 6195200, "iterations_since_restore": 484, "perf": {"cpu_util_percent": 29.970454545454547, "ram_util_percent": 59.07727272727273}} +{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 616.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.095}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.99, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.99, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.25, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.54, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.54, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.54, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0, 587.0, 633.0, 624.0, 633.0, 636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7632241989034774, "mean_processing_ms": 0.22131123162111455, "mean_inference_ms": 1.3662700284091551}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11640000, "num_steps_sampled": 6208000, "sample_time_ms": 22316.513, "load_time_ms": 39.137, "grad_time_ms": 10131.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029844159726053476, "policy_loss": -0.003889852436259389, "vf_loss": 74.29019165039062, "vf_explained_var": 0.7697036862373352, "kl": 0.0019323105225339532, "entropy": 1.1094969511032104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6208000, "episodes_total": 15520, "training_iteration": 485, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-58-02", "timestamp": 1660262282, "time_this_iter_s": 33.77160096168518, "time_total_s": 20689.82793545723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20689.82793545723, "timesteps_since_restore": 6208000, "iterations_since_restore": 485, "perf": {"cpu_util_percent": 30.51875, "ram_util_percent": 58.97291666666666}} +{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 613.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.905}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.61, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.02, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0, 587.0, 633.0, 630.0, 636.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0, 579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.762623780459851, "mean_processing_ms": 0.2211964092332032, "mean_inference_ms": 1.3657033505370475}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11664000, "num_steps_sampled": 6220800, "sample_time_ms": 22297.013, "load_time_ms": 39.012, "grad_time_ms": 10179.524, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004206617828458548, "policy_loss": -0.003324081189930439, "vf_loss": 80.87010192871094, "vf_explained_var": 0.7655022740364075, "kl": 0.001813961542211473, "entropy": 1.1126155853271484, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6220800, "episodes_total": 15552, "training_iteration": 486, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-58-35", "timestamp": 1660262315, "time_this_iter_s": 33.220837116241455, "time_total_s": 20723.04877257347, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20723.04877257347, "timesteps_since_restore": 6220800, "iterations_since_restore": 486, "perf": {"cpu_util_percent": 32.0404255319149, "ram_util_percent": 59.24255319148937}} +{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 618.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.46}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.72, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0, 630.0, 630.0, 630.0, 639.0, 579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0, 630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.762033130729778, "mean_processing_ms": 0.22108464880455408, "mean_inference_ms": 1.3652218358269517}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11688000, "num_steps_sampled": 6233600, "sample_time_ms": 22173.711, "load_time_ms": 39.002, "grad_time_ms": 10239.525, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017484220443293452, "policy_loss": -0.005304198246449232, "vf_loss": 76.0898666381836, "vf_explained_var": 0.767227828502655, "kl": 0.0019641267135739326, "entropy": 1.1127411127090454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6233600, "episodes_total": 15584, "training_iteration": 487, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-59-10", "timestamp": 1660262350, "time_this_iter_s": 34.99830985069275, "time_total_s": 20758.047082424164, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20758.047082424164, "timesteps_since_restore": 6233600, "iterations_since_restore": 487, "perf": {"cpu_util_percent": 33.78367346938776, "ram_util_percent": 59.60612244897959}} +{"episode_reward_max": 644.0, "episode_reward_min": 522.0, "episode_reward_mean": 619.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.9}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.8, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.09, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0, 636.0, 633.0, 633.0, 633.0, 630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0, 636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7614456124606471, "mean_processing_ms": 0.22097346860872114, "mean_inference_ms": 1.3647307081670101}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11712000, "num_steps_sampled": 6246400, "sample_time_ms": 21961.201, "load_time_ms": 38.731, "grad_time_ms": 10224.967, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00313456985168159, "policy_loss": -0.003917869180440903, "vf_loss": 76.04095458984375, "vf_explained_var": 0.7662898898124695, "kl": 0.0023237813729792833, "entropy": 1.1033259630203247, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6246400, "episodes_total": 15616, "training_iteration": 488, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-59-42", "timestamp": 1660262382, "time_this_iter_s": 32.1832549571991, "time_total_s": 20790.230337381363, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20790.230337381363, "timesteps_since_restore": 6246400, "iterations_since_restore": 488, "perf": {"cpu_util_percent": 31.12391304347826, "ram_util_percent": 59.10434782608694}} +{"episode_reward_max": 644.0, "episode_reward_min": 561.0, "episode_reward_mean": 624.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 277.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 312.325}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 191.05, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.52, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.54, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.21, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.54, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.21, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.54, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.21, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 636.0, 582.0, 579.0, 633.0, 636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0, 579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7608592858442179, "mean_processing_ms": 0.22086239768906624, "mean_inference_ms": 1.3642056926460657}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11736000, "num_steps_sampled": 6259200, "sample_time_ms": 22026.8, "load_time_ms": 38.825, "grad_time_ms": 10224.17, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021181919146329165, "policy_loss": -0.004830162972211838, "vf_loss": 74.9923324584961, "vf_explained_var": 0.7754970192909241, "kl": 0.002056455472484231, "entropy": 1.1017627716064453, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6259200, "episodes_total": 15648, "training_iteration": 489, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-00-16", "timestamp": 1660262416, "time_this_iter_s": 33.8461229801178, "time_total_s": 20824.07646036148, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20824.07646036148, "timesteps_since_restore": 6259200, "iterations_since_restore": 489, "perf": {"cpu_util_percent": 34.53541666666667, "ram_util_percent": 59.26458333333333}} +{"episode_reward_max": 644.0, "episode_reward_min": 575.0, "episode_reward_mean": 626.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 285.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 313.145}, "custom_metrics": {"sparse_reward_mean": 217.2, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 191.89, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.99, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.72, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.17, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.97, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.72, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.17, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.72, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.17, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 587.0, 633.0, 633.0, 582.0, 579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.760275311275054, "mean_processing_ms": 0.22075194950197713, "mean_inference_ms": 1.36361755561513}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11760000, "num_steps_sampled": 6272000, "sample_time_ms": 22027.209, "load_time_ms": 38.894, "grad_time_ms": 10417.749, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028506640810519457, "policy_loss": -0.003879321739077568, "vf_loss": 72.80782318115234, "vf_explained_var": 0.7767069935798645, "kl": 0.0016769763315096498, "entropy": 1.101601243019104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6272000, "episodes_total": 15680, "training_iteration": 490, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-00-49", "timestamp": 1660262449, "time_this_iter_s": 33.20848989486694, "time_total_s": 20857.284950256348, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20857.284950256348, "timesteps_since_restore": 6272000, "iterations_since_restore": 490, "perf": {"cpu_util_percent": 36.26808510638298, "ram_util_percent": 59.0851063829787}} +{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 624.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 312.42}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 191.24, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.13, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.73, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.28, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.73, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.73, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7596883332679625, "mean_processing_ms": 0.22064007492809667, "mean_inference_ms": 1.3629331607275919}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11784000, "num_steps_sampled": 6284800, "sample_time_ms": 21863.417, "load_time_ms": 38.973, "grad_time_ms": 10519.041, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0017596340039744973, "policy_loss": -0.008380659855902195, "vf_loss": 71.76502227783203, "vf_explained_var": 0.787438690662384, "kl": 0.0020715922582894564, "entropy": 1.110949158668518, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6284800, "episodes_total": 15712, "training_iteration": 491, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-01-21", "timestamp": 1660262481, "time_this_iter_s": 31.476083278656006, "time_total_s": 20888.761033535004, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20888.761033535004, "timesteps_since_restore": 6284800, "iterations_since_restore": 491, "perf": {"cpu_util_percent": 35.37777777777777, "ram_util_percent": 59.10222222222222}} +{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 619.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.65}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.3, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.17, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0, 636.0, 636.0, 636.0, 630.0, 636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7590978232117643, "mean_processing_ms": 0.22052625115278437, "mean_inference_ms": 1.3621520810826262}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11808000, "num_steps_sampled": 6297600, "sample_time_ms": 21732.6, "load_time_ms": 39.061, "grad_time_ms": 10636.833, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013283310690894723, "policy_loss": -0.00562013266608119, "vf_loss": 75.04083251953125, "vf_explained_var": 0.7633475661277771, "kl": 0.002308204537257552, "entropy": 1.1112407445907593, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6297600, "episodes_total": 15744, "training_iteration": 492, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-01-52", "timestamp": 1660262512, "time_this_iter_s": 31.412389039993286, "time_total_s": 20920.173422574997, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20920.173422574997, "timesteps_since_restore": 6297600, "iterations_since_restore": 492, "perf": {"cpu_util_percent": 32.93999999999999, "ram_util_percent": 59.11333333333331}} +{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 615.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.99}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.38, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 636.0, 587.0, 639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0, 573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7584994121289265, "mean_processing_ms": 0.22040925813735274, "mean_inference_ms": 1.3613199972214534}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11832000, "num_steps_sampled": 6310400, "sample_time_ms": 21713.634, "load_time_ms": 39.62, "grad_time_ms": 11039.119, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016831206157803535, "policy_loss": -0.005174629390239716, "vf_loss": 74.1271743774414, "vf_explained_var": 0.7761192321777344, "kl": 0.002214068779721856, "entropy": 1.1099259853363037, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6310400, "episodes_total": 15776, "training_iteration": 493, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-02-26", "timestamp": 1660262546, "time_this_iter_s": 33.39047908782959, "time_total_s": 20953.563901662827, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20953.563901662827, "timesteps_since_restore": 6310400, "iterations_since_restore": 493, "perf": {"cpu_util_percent": 34.62978723404255, "ram_util_percent": 59.10212765957445}} +{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 617.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.875}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.35, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.12, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.07, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.07, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.07, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0, 624.0, 630.0, 639.0, 639.0, 573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0, 639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7579059857165342, "mean_processing_ms": 0.22029379571210359, "mean_inference_ms": 1.3605505316111535}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11856000, "num_steps_sampled": 6323200, "sample_time_ms": 21716.733, "load_time_ms": 39.394, "grad_time_ms": 11172.934, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015474725514650345, "policy_loss": -0.005591364111751318, "vf_loss": 76.93659210205078, "vf_explained_var": 0.7724745869636536, "kl": 0.002216791734099388, "entropy": 1.1096433401107788, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6323200, "episodes_total": 15808, "training_iteration": 494, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-02-58", "timestamp": 1660262578, "time_this_iter_s": 32.0608389377594, "time_total_s": 20985.624740600586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20985.624740600586, "timesteps_since_restore": 6323200, "iterations_since_restore": 494, "perf": {"cpu_util_percent": 31.900000000000002, "ram_util_percent": 59.13260869565216}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 615.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 307.915}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.03, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.02, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0, 630.0, 639.0, 630.0, 579.0, 639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0, 582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7573280240404361, "mean_processing_ms": 0.22018399055339372, "mean_inference_ms": 1.359923906771843}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11880000, "num_steps_sampled": 6336000, "sample_time_ms": 21753.166, "load_time_ms": 39.587, "grad_time_ms": 11166.874, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002439265139400959, "policy_loss": -0.004945265594869852, "vf_loss": 79.36029815673828, "vf_explained_var": 0.7895925045013428, "kl": 0.0015944234328344464, "entropy": 1.1029914617538452, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6336000, "episodes_total": 15840, "training_iteration": 495, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-03-32", "timestamp": 1660262612, "time_this_iter_s": 34.07752990722656, "time_total_s": 21019.702270507812, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21019.702270507812, "timesteps_since_restore": 6336000, "iterations_since_restore": 495, "perf": {"cpu_util_percent": 32.84375, "ram_util_percent": 59.13125}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 614.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 307.225}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.85, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.3, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.65, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.3, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.3, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0, 633.0, 582.0, 636.0, 633.0, 582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0, 630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7567700204261155, "mean_processing_ms": 0.22008117197400467, "mean_inference_ms": 1.3596748332774586}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11904000, "num_steps_sampled": 6348800, "sample_time_ms": 22347.681, "load_time_ms": 39.524, "grad_time_ms": 11120.722, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003682489274069667, "policy_loss": -0.0037913068663328886, "vf_loss": 80.29010772705078, "vf_explained_var": 0.7666907906532288, "kl": 0.0018009584164246917, "entropy": 1.1104191541671753, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6348800, "episodes_total": 15872, "training_iteration": 496, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-04-10", "timestamp": 1660262650, "time_this_iter_s": 38.697832107543945, "time_total_s": 21058.400102615356, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21058.400102615356, "timesteps_since_restore": 6348800, "iterations_since_restore": 496, "perf": {"cpu_util_percent": 33.66909090909091, "ram_util_percent": 59.103636363636355}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 612.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.34}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.28, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0, 582.0, 639.0, 636.0, 633.0, 630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0, 587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7562172148453271, "mean_processing_ms": 0.21998071761796992, "mean_inference_ms": 1.359487549617304}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11928000, "num_steps_sampled": 6361600, "sample_time_ms": 22295.567, "load_time_ms": 39.346, "grad_time_ms": 11042.076, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008341053617186844, "policy_loss": -0.007899199612438679, "vf_loss": 76.18367767333984, "vf_explained_var": 0.7670480608940125, "kl": 0.0021500647999346256, "entropy": 1.106536865234375, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6361600, "episodes_total": 15904, "training_iteration": 497, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-04-44", "timestamp": 1660262684, "time_this_iter_s": 33.68950605392456, "time_total_s": 21092.08960866928, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21092.08960866928, "timesteps_since_restore": 6361600, "iterations_since_restore": 497, "perf": {"cpu_util_percent": 35.32553191489361, "ram_util_percent": 59.131914893617}} +{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 612.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.32}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.24, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.91, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.91, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.91, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0, 180.0, 630.0, 630.0, 627.0, 587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0, 636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7556558584426459, "mean_processing_ms": 0.21987743266667745, "mean_inference_ms": 1.3592501447797594}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11952000, "num_steps_sampled": 6374400, "sample_time_ms": 22219.899, "load_time_ms": 39.602, "grad_time_ms": 11084.562, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010325837647542357, "policy_loss": -0.007863701321184635, "vf_loss": 73.90652465820312, "vf_explained_var": 0.7726984024047852, "kl": 0.0017767212120816112, "entropy": 1.1190696954727173, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6374400, "episodes_total": 15936, "training_iteration": 498, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-05-16", "timestamp": 1660262716, "time_this_iter_s": 31.8541898727417, "time_total_s": 21123.943798542023, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21123.943798542023, "timesteps_since_restore": 6374400, "iterations_since_restore": 498, "perf": {"cpu_util_percent": 35.686666666666675, "ram_util_percent": 59.20666666666665}} +{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 613.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.735}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.27, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.05, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 14, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 14, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 578.0, 587.0, 570.0, 587.0, 582.0, 587.0, 582.0, 573.0, 636.0, 630.0, 636.0, 633.0, 600.0, 633.0, 444.0, 630.0, 636.0, 636.0, 633.0, 636.0, 630.0, 582.0, 584.0, 639.0, 633.0, 630.0, 587.0, 587.0, 582.0, 633.0, 630.0, 630.0, 579.0, 582.0, 630.0, 636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0, 636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 290.0, 288.0, 288.0, 299.0, 285.0, 285.0, 296.0, 291.0, 291.0, 291.0, 288.0, 299.0, 291.0, 291.0, 285.0, 288.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 301.0, 299.0, 311.0, 322.0, 221.0, 223.0, 308.0, 322.0, 319.0, 317.0, 317.0, 319.0, 319.0, 314.0, 309.0, 327.0, 314.0, 316.0, 286.0, 296.0, 293.0, 291.0, 324.0, 315.0, 319.0, 314.0, 314.0, 316.0, 296.0, 291.0, 293.0, 294.0, 296.0, 286.0, 314.0, 319.0, 316.0, 314.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7550828949863245, "mean_processing_ms": 0.21976950142528295, "mean_inference_ms": 1.358698286827804}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11976000, "num_steps_sampled": 6387200, "sample_time_ms": 22150.153, "load_time_ms": 39.223, "grad_time_ms": 10918.039, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016517750918865204, "policy_loss": -0.00551184406504035, "vf_loss": 77.22379302978516, "vf_explained_var": 0.7703518271446228, "kl": 0.002163690747693181, "entropy": 1.1175265312194824, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6387200, "episodes_total": 15968, "training_iteration": 499, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-05-48", "timestamp": 1660262748, "time_this_iter_s": 31.469547986984253, "time_total_s": 21155.413346529007, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21155.413346529007, "timesteps_since_restore": 6387200, "iterations_since_restore": 499, "perf": {"cpu_util_percent": 36.49555555555556, "ram_util_percent": 59.12888888888887}} +{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 605.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.84}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 186.08, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 14, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 14, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.81, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 582.0, 636.0, 633.0, 66.0, 576.0, 630.0, 636.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 421.0, 587.0, 582.0, 633.0, 627.0, 636.0, 582.0, 627.0, 630.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 570.0, 579.0, 587.0, 582.0, 639.0, 636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0, 636.0, 578.0, 587.0, 570.0, 587.0, 582.0, 587.0, 582.0, 573.0, 636.0, 630.0, 636.0, 633.0, 600.0, 633.0, 444.0, 630.0, 636.0, 636.0, 633.0, 636.0, 630.0, 582.0, 584.0, 639.0, 633.0, 630.0, 587.0, 587.0, 582.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 314.0, 319.0, 317.0, 322.0, 290.0, 292.0, 319.0, 317.0, 312.0, 321.0, 34.0, 32.0, 288.0, 288.0, 316.0, 314.0, 322.0, 314.0, 293.0, 289.0, 293.0, 289.0, 316.0, 311.0, 316.0, 320.0, 317.0, 322.0, 304.0, 323.0, 202.0, 219.0, 294.0, 293.0, 288.0, 294.0, 319.0, 314.0, 322.0, 305.0, 319.0, 317.0, 291.0, 291.0, 308.0, 319.0, 314.0, 316.0, 311.0, 319.0, 316.0, 317.0, 316.0, 317.0, 319.0, 317.0, 288.0, 294.0, 316.0, 317.0, 280.0, 290.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0, 319.0, 317.0, 290.0, 288.0, 288.0, 299.0, 285.0, 285.0, 296.0, 291.0, 291.0, 291.0, 288.0, 299.0, 291.0, 291.0, 285.0, 288.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 301.0, 299.0, 311.0, 322.0, 221.0, 223.0, 308.0, 322.0, 319.0, 317.0, 317.0, 319.0, 319.0, 314.0, 309.0, 327.0, 314.0, 316.0, 286.0, 296.0, 293.0, 291.0, 324.0, 315.0, 319.0, 314.0, 314.0, 316.0, 296.0, 291.0, 293.0, 294.0, 296.0, 286.0, 314.0, 319.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7545081908133792, "mean_processing_ms": 0.21966006909098515, "mean_inference_ms": 1.3581228643420677}, "off_policy_estimator": {}, "info": {"num_steps_trained": 12000000, "num_steps_sampled": 6400000, "sample_time_ms": 22245.607, "load_time_ms": 39.734, "grad_time_ms": 10935.257, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013599375961348414, "policy_loss": -0.008364356122910976, "vf_loss": 75.6484375, "vf_explained_var": 0.7971202731132507, "kl": 0.002114498522132635, "entropy": 1.1208560466766357, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6400000, "episodes_total": 16000, "training_iteration": 500, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-06-22", "timestamp": 1660262782, "time_this_iter_s": 34.34099221229553, "time_total_s": 21189.754338741302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21189.754338741302, "timesteps_since_restore": 6400000, "iterations_since_restore": 500, "perf": {"cpu_util_percent": 34.074999999999996, "ram_util_percent": 59.19375}} From e8ecd6990fa7a3b3d18c81f1116c18bf19cd3027 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Wed, 24 Aug 2022 01:15:38 +0200 Subject: [PATCH 13/38] training script for 5 classic layouts --- human_aware_rl/ppo/run_experiments.sh | 37 +++++++++++++++++++-------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/human_aware_rl/ppo/run_experiments.sh b/human_aware_rl/ppo/run_experiments.sh index 46a59b9f..0c6532a6 100755 --- a/human_aware_rl/ppo/run_experiments.sh +++ b/human_aware_rl/ppo/run_experiments.sh @@ -1,11 +1,28 @@ #!/usr/bin/env bash -python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_higher_S_hor_S_final_1e-2" entropy_coeff_horizon=3e6 entropy_coeff_end=1e-2 -python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_higher_S_hor_S_final_1e-3" entropy_coeff_horizon=3e6 entropy_coeff_end=1e-3 -python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_higher_S_hor_S_final_1e-4" entropy_coeff_horizon=3e6 entropy_coeff_end=1e-4 -python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_S_final_1e-2" entropy_coeff_end=1e-2 -python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_S_final_1e-3" entropy_coeff_end=1e-3 -python ppo_rllib_client.py with seeds="[2229]" lr=5e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=500 layout_name="cramped_room" experiment_name="ppo_sp_S_final_1e-4" entropy_coeff_end=1e-4 -# python ppo_rllib_client.py with temp_dir=/tmp/nathan_ray seeds="[2229, 7649, 7225, 9807, 386]" lr=6e-4 reward_shaping_horizon=3.5e6 vf_loss_coeff=1e-4 num_training_iters=833 layout_name="coordination_ring" experiment_name="ppo_sp_coord_ring" -# python ppo_rllib_client.py with temp_dir=/tmp/nathan_ray seeds="[2229, 7649, 7225, 9807, 386]" lr=8e-4 reward_shaping_horizon=2.5e6 vf_loss_coeff=1e-4 num_training_iters=667 layout_name="counter_circuit" experiment_name="ppo_sp_counter_circuit" -# python ppo_rllib_client.py -tmp /tmp/nathan_ray -s 2229 7649 7225 9807 386 -lr 8e-4 -r 2.5e6 --gpus 1 -vf 0.5 -n 625 -l "forced_coordination" -en "ppo_sp_forced_coord" -# python ppo_rllib_client.py -tmp /tmp/nathan_ray -s 2229 7649 7225 9807 386 -lr 1e-3 -r 2.5e6 --gpus 1 -vf 0.5 -n 583 -l "asymmetric_advantages" -en "ppo_sp_asymm_advs" \ No newline at end of file +# This file contains the script to generate the baseline ppo self-play agents for the 5 classic layouts + +# Please check if your computer has enough power for 16x parallelization, otherwise change the num_workers parameter +python ppo_rllib_client.py with seeds=[11] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[21] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[31] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[41] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 + +python ppo_rllib_client.py with seeds=[11] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[21] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[31] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[41] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 + +python ppo_rllib_client.py with seeds=[11] layout_name="coordination_ring" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[21] layout_name="coordination_ring" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[31] layout_name="coordination_ring" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[41] layout_name="coordination_ring" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 + +python ppo_rllib_client.py with seeds=[11] layout_name="forced_coordination" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[21] layout_name="forced_coordination" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[31] layout_name="forced_coordination" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[41] layout_name="forced_coordination" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 + +python ppo_rllib_client.py with seeds=[11] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[21] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[31] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 +python ppo_rllib_client.py with seeds=[41] layout_name="counter_circuit_o_1order" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 \ No newline at end of file From a4e759a389c3dd090069d6928f7c47b9c4bfd555 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Wed, 24 Aug 2022 01:30:38 +0200 Subject: [PATCH 14/38] update on readme --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index 0329aee2..c2ba5f0d 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,18 @@ You should see all tests passing. Note: the tests are broken up into separate files because they rely on different tensorflow execution states (i.e. the bc tests run tf in eager mode, while rllib requires tensorflow to be running symbollically). Going forward, it would probably be best to standardize the tensorflow execution state, or re-write the code such that it is robust to execution state. +## Training examples + +You can also try to train agents for 5 classical layouts + +```bash +(harl_rllib) $ cd ppo +(harl_rllib) ppo $ bash run_experiments.sh +``` + +Please select the correct `num_workers` parameter, otherwise the training of the agents wouldn't be able to start. + + # Repo Structure Overview `ppo/`: @@ -118,6 +130,9 @@ Note: the tests are broken up into separate files because they rely on different - `ppo_rllib_client.py` Driver code for configuing and launching the training of an agent. More details about usage below - `ppo_rllib_from_params_client.py`: train one agent with PPO in Overcooked with variable-MDPs - `ppo_rllib_test.py` Reproducibility tests for local sanity checks +- `run_experiments.sh` Script for training agents on 5 classical layouts +- `trained_example/` Pretrained model for testing purposes + `rllib/`: - `rllib.py`: rllib agent and training utils that utilize Overcooked APIs From 6d145c3b3abfb77c517da61b3412c44ac69ea425 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Fri, 2 Sep 2022 19:36:03 +0200 Subject: [PATCH 15/38] plotting, shifting function to utils, changes to README --- .../ppo/plot_example_experiments.py | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 human_aware_rl/ppo/plot_example_experiments.py diff --git a/human_aware_rl/ppo/plot_example_experiments.py b/human_aware_rl/ppo/plot_example_experiments.py new file mode 100644 index 00000000..7ad63e54 --- /dev/null +++ b/human_aware_rl/ppo/plot_example_experiments.py @@ -0,0 +1,54 @@ +from human_aware_rl.utils import set_style +from human_aware_rl.utils import * +import os +import numpy as np +import re +import matplotlib.pyplot as plt +import matplotlib + +envs = ['cramped_room', 'forced_coordination', 'counter_circuit_o_1', 'coordination_ring', 'asymmetric_advantages'] + +def get_list_experiments(path): + result = {} + subdirs = [name for name in os.listdir(path) + if os.path.isdir(os.path.join(path, name))] + for env in envs: + result[env] = {'files': [path+'/'+x for x in subdirs if re.search(env, x)]} + return result + +def get_statistics(dict): + for env in dict: + rewards = [get_last_episode_rewards(file+'/result.json')['episode_reward_mean'] for file in dict[env]['files']] + dict[env]['rewards'] = rewards + dict[env]['std'] = np.std(rewards) + dict[env]['mean'] = np.mean(rewards) + return dict + +def plot_statistics(dict): + names = [] + stds = [] + means = [] + for env in dict: + names.append(env) + stds.append(dict[env]['std']) + means.append(dict[env]['mean']) + + x_pos = np.arange(len(names)) + matplotlib.rc('xtick', labelsize=7) + fig, ax = plt.subplots() + ax.bar(x_pos, means, yerr=stds, align='center', alpha=0.5, ecolor='black', capsize=10) + ax.set_ylabel('Average reward per episode') + ax.set_xticks(x_pos) + ax.set_xticklabels(names) + ax.yaxis.grid(True) + + # Save the figure and show + plt.tight_layout() + plt.savefig('rewards.png') + plt.show() + +if __name__ == "__main__": + experiments = get_list_experiments('results') + experiments_results = get_statistics(experiments) + print(experiments_results['cramped_room']['std']) + plot_statistics(experiments_results) From f8734cb2a6e3b64394ec8027c7951b769aeb9995 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Fri, 2 Sep 2022 23:13:01 +0200 Subject: [PATCH 16/38] readme, utils --- README.md | 4 ++-- human_aware_rl/ppo/ppo_rllib_test.py | 18 +++++++++--------- human_aware_rl/ppo/results/readme.txt | 1 + human_aware_rl/ppo/run_experiments.sh | 1 + human_aware_rl/utils.py | 12 +++++++++++- 5 files changed, 24 insertions(+), 12 deletions(-) create mode 100644 human_aware_rl/ppo/results/readme.txt diff --git a/README.md b/README.md index c2ba5f0d..a0148c65 100644 --- a/README.md +++ b/README.md @@ -120,8 +120,8 @@ You can also try to train agents for 5 classical layouts (harl_rllib) ppo $ bash run_experiments.sh ``` -Please select the correct `num_workers` parameter, otherwise the training of the agents wouldn't be able to start. - +Please choose the `num_workers` parameters to be less than the number of CPUs on your machine. For more information +check the ray [documentation](https://docs.ray.io/en/latest/rllib/rllib-training.html) # Repo Structure Overview diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index bf46d7dd..c41b2326 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -10,7 +10,7 @@ from overcooked_ai_py.agents.benchmarking import AgentEvaluator import tensorflow as tf import numpy as np -import json +from human_aware_rl.utils import get_last_episode_rewards # Note: using the same seed across architectures can still result in differing values def set_global_seed(seed): @@ -298,14 +298,14 @@ def test_resume_functionality(self): threshold = 0.1 - with open('trained_example/cramped_room/result.json') as f: - j = json.loads(f.readlines()[-1]) - #Test total reward - self.assertAlmostEqual(j['episode_reward_mean'], results['average_total_reward'], - delta=threshold * j['episode_reward_mean']) - #Test sparse reward - self.assertAlmostEqual(j['custom_metrics']['sparse_reward_mean'], results['average_sparse_reward'], - delta=threshold * j['custom_metrics']['sparse_reward_mean']) + rewards = get_last_episode_rewards('trained_example/cramped_room/result.json') + + #Test total reward + self.assertAlmostEqual(rewards['episode_reward_mean'], results['average_total_reward'], + delta=threshold * rewards['episode_reward_mean']) + #Test sparse reward + self.assertAlmostEqual(rewards['sparse_reward_mean'], results['average_sparse_reward'], + delta=threshold * rewards['sparse_reward_mean']) def _clear_pickle(): # Write an empty dictionary to our static "expected" results location diff --git a/human_aware_rl/ppo/results/readme.txt b/human_aware_rl/ppo/results/readme.txt new file mode 100644 index 00000000..44d60515 --- /dev/null +++ b/human_aware_rl/ppo/results/readme.txt @@ -0,0 +1 @@ +You can download trained examples here - https://drive.google.com/file/d/1vIrZ7CphtRFBdLShO9WbGAl2qsbbJfMB/view?usp=sharing \ No newline at end of file diff --git a/human_aware_rl/ppo/run_experiments.sh b/human_aware_rl/ppo/run_experiments.sh index 0c6532a6..5cac975f 100755 --- a/human_aware_rl/ppo/run_experiments.sh +++ b/human_aware_rl/ppo/run_experiments.sh @@ -7,6 +7,7 @@ python ppo_rllib_client.py with seeds=[21] layout_name="cramped_room" num_worke python ppo_rllib_client.py with seeds=[31] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 python ppo_rllib_client.py with seeds=[41] layout_name="cramped_room" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 + python ppo_rllib_client.py with seeds=[11] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 python ppo_rllib_client.py with seeds=[21] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 python ppo_rllib_client.py with seeds=[31] layout_name="asymmetric_advantages" num_workers=16 train_batch_size=12800 sgd_minibatch_size=8000 num_training_iters=500 evaluation_interval=100 use_phi=False entropy_coeff_start=0.2 entropy_coeff_end=0.0005 num_sgd_iter=8 lr=1e-3 diff --git a/human_aware_rl/utils.py b/human_aware_rl/utils.py index 7ab1f4a3..b8cbb7cc 100644 --- a/human_aware_rl/utils.py +++ b/human_aware_rl/utils.py @@ -6,6 +6,7 @@ import itertools import numpy as np import tensorflow as tf +import json def delete_dir_if_exists(dir_path, verbose=False): @@ -166,4 +167,13 @@ def get_dict_stats(d): 'max': np.max(v), 'n': len(v) } - return new_d \ No newline at end of file + return new_d + +def get_last_episode_rewards(filename): + with open(filename) as f: + j = json.loads(f.readlines()[-1]) + result = { + 'episode_reward_mean' : j['episode_reward_mean'], + 'sparse_reward_mean' : j['custom_metrics']['sparse_reward_mean'] + } + return result From f2e887faed157943b56e657478535aa18f0c6bb9 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Thu, 8 Sep 2022 01:06:32 +0200 Subject: [PATCH 17/38] readme + plotting --- human_aware_rl/ppo/plot_example_experiments.py | 5 ++--- human_aware_rl/ppo/results/example_rewards.png | Bin 0 -> 20379 bytes human_aware_rl/ppo/run_experiments.sh | 4 +++- 3 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 human_aware_rl/ppo/results/example_rewards.png diff --git a/human_aware_rl/ppo/plot_example_experiments.py b/human_aware_rl/ppo/plot_example_experiments.py index 7ad63e54..eec14b6e 100644 --- a/human_aware_rl/ppo/plot_example_experiments.py +++ b/human_aware_rl/ppo/plot_example_experiments.py @@ -18,7 +18,7 @@ def get_list_experiments(path): def get_statistics(dict): for env in dict: - rewards = [get_last_episode_rewards(file+'/result.json')['episode_reward_mean'] for file in dict[env]['files']] + rewards = [get_last_episode_rewards(file+'/result.json')['sparse_reward_mean'] for file in dict[env]['files']] dict[env]['rewards'] = rewards dict[env]['std'] = np.std(rewards) dict[env]['mean'] = np.mean(rewards) @@ -44,11 +44,10 @@ def plot_statistics(dict): # Save the figure and show plt.tight_layout() - plt.savefig('rewards.png') + plt.savefig('example_rewards.png') plt.show() if __name__ == "__main__": experiments = get_list_experiments('results') experiments_results = get_statistics(experiments) - print(experiments_results['cramped_room']['std']) plot_statistics(experiments_results) diff --git a/human_aware_rl/ppo/results/example_rewards.png b/human_aware_rl/ppo/results/example_rewards.png new file mode 100644 index 0000000000000000000000000000000000000000..90b306e759fe2de87874773c0e991fcd797aa540 GIT binary patch literal 20379 zcmd_SXH=C}yDq%2ccY>LQlfwg2nYx$3fKS_y`xC60n(8w9W`oH6h%4+D!q55=w69r(p9p?z?!bSJSWBF>Rx#JNw!3JlN0GZ|ZDC?=ZDM%oS6e+xD?@WL z0Ukjf{sX^WwYIjf65-`F{rd$x=9UJ$oI)%TILKOyGv}-*l+72(KQvL|k%knC!W!!7 z6ROvP23zdUs?~j6{871Oi^R&JwflqCvmdA3M_bNeaYn{e^=Wp1zTe))_VOT3?$or} zSfR$}hNE(cM>^<^gzErTS%`X&6;iWZh_|cA2|K(fsWO8UKIQQ+7x_0eaj8;~PiO_jN;njF&#la2F#9ejr z^Yb}XB8I=&uPAhS^Y-nB%F4vFKz!um#Krj`*X+SpP3!U)X%4b@b7}Zk8x}cSzwTve zjE`JUu(V8>o0}W34^LX-Hl3N2VEzmI&=XJtzcr}(w2JTUU!wtYTloxi>JsQ+#4Ht9Gq11WO5*v*O`@9t|z zHMr4TVDaVV>!KodYI1UNjN*%vFZwQz@d`(-ceCm$`xtb@%DSU?3$^;0U2F8UZ||aw z>k~Y~CI*{~=V!*VeCYYF{N7dZ%-Gds;4|MwCJ|NqS}k1Kn|Ay5a1ND-D3f!M=e6+& zqmy#3sHmt6dYn3WQZ@OKgw&x@{ve($Ew)Xj4uk3S*0X~dr3W`S&;59xJ=y9w-k(^| zS8;U6i%s;$n{5Gy3_YFc?dv)}ppg{$rX=`o+Go_`arE_rbS@#H+MnWx%>)Z?cqaC+dYv8^fMI|FtgeT#XnvKfm&a4<8y+4HWLZV!^YaKhm=1Ion`s-ua6c zFG?#YXyD$A4Q8}){`%{2&)N)g-MR$r^H_zA>Bhdnt_${qZ3Vh*PNNUmICk&etugiV zifi!W$B)gQ(bLnH6c;CnE6B_Hu<7aRa|#I5)Q&HwP;Ogwn)4Nub56`0)uW6}PS#gE zJL>J_6_%JkBdw&Qq*>sSw`|$6L)V8d%sQPoabo1h52Z-?XVyJcyXz8l3yN*z{rTA| zYd&fuX!-Zkox++KsjjIR^w5Yq`(mWC)c2CHbep%M_=DH_bayT)pJ$A)>aK{X=R6y=yW&~u+_^&ByBPKO08dZP`(7$RmMeybEb-@d zDwp;q2VVvD&Fk0K#3ts)Jl&@dBl(cQGfXX3{XN z53*KKD9;Zk%yD;RoO`R|isgLyox4(ff;Qg6P4@o!ojkSKHvMUr-mYU3u|9+URn0JK zQB5~e8*EIMVU>EMnqxai%gA_udjI#|6^|cxV|E@>l6xkiws!4WhNHG|FGE8|H(bX` zQU7L5Gu-jvDZ{}FZdj6?y}i-x?HA(DCZDuxbIw?pELiM)&*EC^PA}jU5Wp!eEe-7y1Wk1Oj-twEVW%icBBUWAe4O_9VQwKyuH2Es$=n3+P#8!D6N0hw>0Nj zThpdja!f~UYx5>sV?`Y&&Ui{bVvJ^&@#!h$EvTIt@87<2XJl)hW0P-XMTPq0_ZFKU zUnNYNGnG)NlDjHH5>b4;KXIZt9o`;zkxtT3_^U&1ZtmE-UFJOtvqKu=eRb&;Urx5T zF3x*~InPc+D}_onrWrofOg9Q=ahW0_}pk^e{Hl1C$|2L zK3UY2$jC@+ypAtlz9iW!N9k%_ljHWjx#_g$q0Y7fO_!Y(*R1ZVuq^j`_AH*aU{3kZ znplHY;O*}pjk43!ZTLCqg;8CM-e$KzAyZ{V#mAy^ROKsI!tQKhioqpEQJ?O$|K$DT z$qRS)4YQ*i9apHz`uY+40s>5FcW_;+1D|N)0qqS;u@beNm=F;;N}J&nxxp>RZb%)E z0)>g(eP{HFb!*mC;S<#ow2qJ;)z6Vie)8nazshFL+)QWvh=5)Rtz6LIA2GeC`Cd6W zR*Swgl;hXxxCd8hNjW%V&tH4CezjWbe%<@=S^0XUj7MJv`1sh)`zQRtvHW>KK>_Mm zbXXXhGy2H=`}eEyh?q3H{rE1^ZrgU*H8?2f0JgHk>C;K;=_nR8fq4@b428c0@S7~| zy#7e_>aQ6i6{3f?wY<1-si3;;|CGVU(b*i(kFQzV5qe-`^z_s8J z)6s~eq=Qst+)dk|)_By>#_n_JM&78%w7>kK_|+8H$U;b7c-dk7GCG^VMzwUKy6WlC zZnCoC&u7SUsmDFWGFi2WG0M-6&feZ0)%TQw_S5GZ z8!dh2Ox5S)%iEclo^`K5KXj`*tW7y)KN7ZT1Ksnfsq6L*4xHF-IBZpmT^lzwK+HM2 z#ET+$aSM}o5_8mJS?r-Pft@fUJZNA zf>VAcMSQCLmT|sLzLR})0;f>gk5f-SZ1Z{>-@Mc{nDqgMYvPCIy&|rJvtlH``&{^qqD@jJPJ3TvX_sY zB6<4s)pzdn6Wv0AavU5S4B{@i=&kya+wuE@_c(Ig0BYnUBqa&%K{4%X%1B(88&yCr zmJ4xR$olyaD+-&;a^wo;CjxhHYldm(*mAJ5ODtc#NwvUb-trkOKFzJ#K$_wemZDc0 zF#JZy@rdI#ltoAYF+gIQ|B6}oq7HEf;scMjq= z(o#< z<3;(LYj5@T&6^%LQj;hp3u~=N7-(NRgnlgva1%yvTv5r1*Yn4&e^Bbt6;ROpb zGAvHRH-6q8#^s-%pIA=dNvwK&b-wc~8}%WBpfVolpusXc=E5U>w!5zemPh>l`x;ax zKQ%U-0b`+;cA;mQ%e=#Q&&Q(t^_d2h!85&)A!VG*`0ZXky%obapPFwT;t6O!##6&z zJi|VJK5NrgOC9a5)M(3hUa$4Zi$i6sT)6Ej{tp!4re=m&e7&x#_QJx#gneU%dE$6~ zeYYl|6ah!=66UAT7Y|wYD8_5&sC#p&DXXe_MacTUOwp6uH3se>;pNm7=2;ocIXt&h zpnA|_Q6x>Hv$bGFj>=VAz-l`DZatSfv^xH?oLnndgoB0IYYO zTv><@)emKoa%8`Lx%3{3$6;IJ{_n8@E`O@O--#s zpC(oH+q;MNJv{2&XqI=NG=6xxZ>NBWnE{O!Ga z>g)tk=v@n3&u_`h$}+?P!_zsD(7Spwv+9Kl&(V}d&yCE;KNAUO5Hz|9Qe;%0@GMM* z)eS2vfbX(oa4C7X+1uZ;oeAg7!6uK1iUPN@ZQks$ zCxeCJw(E~EwbjyT`L7>Qc4$yND35YyH{JEoZ4SaVeI61LqMoSZtVRJ}V(IMe)`&}a z>gmZw)zQ((ir_t)bUeg)!ap}JuP#(-bKY3B%!E>*T%Zug{{6+w3t=*Ld+9~2qb3F# zLIKQaW95L8uOEx~bivnzgP)!9h(S=g= zpds<|^AA)$efpH0>hJGgQdV|gLPXzHnJrNZo#O(2rscn+iG!sBmu z>YmBAW-3IF>~D1(;1NTy-Otj7m8b;QcJa2p`3mL|NHOC3NkWWpGL5|d)5@)*;rGsIB#zH zIq8H=zs{_we?UNtv^S^W^hoD@clX-NYv0E3fyUTp6ZV@}#Nr&r`wZVdVzU3ftipIj zRb{0@q(Vp+-Em5xocToKEpjQ+)^}IcheXo>zN^J+rdNxu8g^Zr4KWc4G|L$jqE>&3 zKTmq<$`va*7~;9s6dfEJt5N80GB3bA?@!o;n|3SM!^4A(ek3};G@-XQp z8P##xU%$Tp*I$3Fat$uOp z?$}5vukq}}ponG1@A-=hjvMI?9>Rw8$r&jOOb$S7oOaSfr9S5rBrbpMSeS)~)1+EGRS^EC{`-P0_KQ2+pju=BR zBHMZ6E}_~erO?_)1s3jeDQ|S{Jb3WHhew-p=gytCZ{Lna0|4hb{-S61l17osV;C!MIC(xVcmbD^S0Hu1BPeEUWIm z$Pkxmu>0C@nVnQ+Ev+C>QKO$7jAnSeUbjBiezZ2<*-><)nANB`GnuV-cBmj@h+!4_ zlGQ=Ioff!NHZx;}?ECih`Gb+I<3+!+KQr!<&MSLA>mzEmfT`|0*06fFC^2?5ukoFTE)M)JeeT$U6@ zaTljx545QEX4#{w6g^*=HbtNzNqYi*(K0X?>HokB58mLQ8J_y_&7tFo)YxdHcq~@6 zXBZk?x><|ngB{!{ZLD;7S$L&aKV=icp)-*l=-I5=2ZTH5pOU8yBG?`}YT-*KSt2>y}pM;SF;KvX#1Q!4!U zirg>Z$coEk*SbLr(Wk}DbK82MgwM^BC?F4+j;ebc8~GiZi-Sr~K$<}%bL!H$ti?O} z>k~D=VP2Uuyfn|BmISQv3~O_lec1GdkT80cW$o*EbDwMI*h`=rlVv+lbJTVq3N7g; zqLQXG=Q(DeTG-T$p*!@9_15UC;?0z&ep@NAs?jf}rlw*e7H9esv&TQ3{|8Npp=CHM zOm}-fe*7!Cc~B7JJlGn8s6!fX^M${cqy7RhPe;%z< z#0^NT21_Us4AKz9QkqIt0(2&tTcFZ0oQ>=iX-aMx${6$+{T*w``S}A!;J?ju|EjS1 zU0O{Lsa(JzDgdBxs3=}xk%S)tjrBbgZdF7lrXR%hx!v+lS)YkDOynne3H+q+5812@ zvQS^fU$5uAij!Ekaibb4hE-lGv@(Gh@+NZ66-39q*m~nLTC@8EaphP+0x>t%LRA^f zHBSx2?fO&S#B;0%4lMidfiINK%guhY%kb-)d+h3MSnGlpU;nl>b~A;d6%uW?>`BrZtK9Mi=Mkr-Jvz|ox8n$CoMBG5B2`ThiU-A>(p36UGYR&RF++^wSBc$Avlgy z1E4h#i*`3`%}8Gz2P*K;$HE)S%cEB9-9Nz7P+O}y`wh}u?4uzd8ewLv2BJ)Ph%cN~ ziIlI~rf}?H2s0gB6up4{FOXA?j*_o2qkj=g)h%NUUBpGAYU!Yg>Ssy(I`jTjB|&`*%5U5HvtU>#Ad zv}y`w`nUuQtKYbDUhwP?*yrsup$ehb?}?e~T>XnY&TjS0ZBEB>V@gAf9-idJ-DE8F zq8v7BJ}e<2(VJMXpe!%{D1W+Z7m*`FoJQ`U=%NxH{P1q^z^9T*{q2Sg=lGo0=gIz! zkpROPi@Ye;hw>(kK6tVd$rd;)iU`JrH4&OASMGh!pEVPRaieP5xt_Ukkx3?wfIW8g zofOJ`x|)RLpI@1tN24r>``Z31vzl>|%nk_X0^=vENSOdGW!DzrIe6$$ZP`~Wl?%&o zyMKIpKVHo0(F4dqKqedY-o1N9E!pXWIe$5Ate@s5rt*BfK|1xFZyYyRxT%dYX%BMu# z#Gan3P_xgGas%CD&z?OH6pca*I*Ol=JDfi^A``D$paZX?uO%mAejK}psO39(b!xGY z#(@=$7v`oJgw3>mXyEp?uQCwWP_*A{cSf`2gX-#PFK_Sg6g}QSwfwovOKGLwGje3E*6AMO$x~u%Zp*1zz^vnIL)}$0AwDTRHE~EOZ?GJNjA61(zqclIB z66N;|b@jL(s3Lo0l?8~UKJC*;XfaezUteD~L$q0}eO1DHMh~IKL#0Sr{SU-th7kxU{S}BTv6?R2cDbqLXyjiZO=BSzsZZFd4Bchr=8E|pgG9NS>k>v0k|#HAlP2g zv@uNyD4*y`Cs0OHbM>g{k$-);LGnmKP@sg=r{eXXcpwOFtG!`A(|t@UB@^c(Le?VQ+PKW75S_ z3G1njj*k6b-#(C*l`Ruocm3;|wIDl9-3@^72_rlq>`wN<1y)v8PXhy+)M~V~wV?}X zHrIXdy=Zj}pV3jAzsxO=zCfzWa%@Nm7`*eq1p)dyFYpn)1eD^EA&!4P(OXreNbD9Y zfC)o_^Fj1zckI|pg?wG}y(PzHq=Pzp4C@oE`7VHnv_3HzgS=`UQY59MjK93TJp(yB z9wtkZN5kmYm@3L|2lVnA%U5&m-75+H0@<~yMnOCtG8LYxv?uPx3p6&-nV!%95t|r* z(?hlcXR*1ukCdS3TwsUt(_QAzN;N{AOz35C9(%&5%|Y z$5;B5!Ow<{p~Zeezgwrpi&v}BPSYJ%P*9i+!S#ZYJ?oeMwJF2A9!&$Py`l6eQmh{B z@u0W>B~QEumC9vy8a`3}?5GA0k8j1El8TB?Dhi^VRWebyPtS*CkF1|6+ALlRRcG*v zLbf%d4wePbz^R`#Xsz41bLT*~+tg~nydr@Ne*OmU#0IZNi?RWq-UC5>5jaK zMq>xi#Gi10ogyD7B=;A&g5T2Jo_*lqav1TpVjoCJ_6+uTraSse3nv;He{Knh&@0*5X z)wVi0Ie}fOTpFi&s|Q2}P5o2cxlq*hXjy-L*vIZLPX(bhL!k&VUa^cKT??8(>cQ4n z^SP-Xa1~G9t6NR$i{f(h=y_r#vk~13Wg4AE19U3gbQ-2Lr0Et^xz+x0KwdSO`27YP_vv$a@NiW)V&iU+xg0jK^4s=B+or-_?Ec>*1a z1p4nI3?^jl2Hdf3#dq!}$wiNijcL`ubyx$uM3FD{+ER!T%gMzhLv$EIe(JUDqCseG zi~WuULwRHXb_Fbz<}hwV)a3ISW@?E#d8*;k+tUqeBmocwe|BF=57Zb~sXZZ!bCK$B zA92L~CsNEG!Ia6({khB=Ryws8(hw~g@^}q^60sA&eW2PIq6Fmql*(2V-r2Y-2GE;y zP@U^vR^rdKWl(?Jp7QJM9ELk-(;uHwTamP|!HyW2h8a3GJ(doa=5tJrLB>nkwl3RG~E= z+02;<8#uwdqs7k4cALd50d@+rnA2IXfGnfzz`#Id6%`bhBzMU9*fG9moyEk&P*XUP zXO@U1zj3OO^eS%&9 zl?Q&g??pPYH^mm`tmORpC58$X^OxRwrTCZefriw=QQb_76L74SBulj@#TSa>d^4*l zpU9FwboDVd&vUdWPHt{FSYqA~sajoTO+`Vku*J_xVyEcX`$-@I0I=HgyFQxip0}Gw ziPg-sc(I3-6&|@bnl@3P1&wMChB_aQ{?=||b!}LL;+20%cU6Q*Lkd|x-oe2OYM-pi{}%|JuQ+ahUCmcYZNYFkg$p9O*m$i9 zXrk967V(6ts;byWC)RD+gi2VrRcOH=S=~h__DpHT{oeCEdV6LztN79=?2Hzp6h8cb zA~}89<4e)qrpN{pA^JPJ1IzUb1lG9q8@J5wUU|pnK=KT*zVBIW?BolLx1ZDa6@?mY zwEeJ*!dT+vNCQ?j8{m37`C`#3D8#3*yikK@zUK^C<}G}_?D0WAwR5JwI$S2fo*-wJ z`59vRsKuxfI^^x?soY!#29(JGN?Q&*8bde>#Kq}~8t5pqgDD_y)?RIr_B~cWEilF3zSB5ovVG+J% zUSNwxc7c>@5PD^Xa&$&IeqUB`1q+>ug(E2( zN9!*enDe&TR8)R!vw6$imPUcq7e(?JcQ-Lrqw{j{^74{8L+S*WuaUZ7)^~13TG}wA z2(SQkQRqM^pWWB`{%<(*QBJzL_FZoixl$j0*CwHvVy#C#tcS%778$kk1HIYmvMdxk zAHLRB%nSRW-95)RukHJnzv)HEG_L2d`QA)~6r$|~U;nxmZeDModHzT2m$47~Lj08E zf(iFUpX-Il3N!C16lDOs4_c#F^z?eM`=*(N_f!8PcHO;s^X9whu24cZaW&n4dGyf1 zJlVg_t-y1Ek^R{bwWy8B=Akv5jL7?^g&zY>5xcwch69EsUzou z612Q++d|(nJIdk~sG&j+*5ugqN5GSCHMsfuu*+N~+BPc-3(JzHxpmvNbU5v|@7$60 zB<%|DDBYxizs>UBgi?`+AD$Zm9~70m+9Nf#-5J>2CAd9Tdd4d4T7$1derQFR-@_dE`E{C)(i{W$zg&oBf9qTyvae7(2tkllAJLQFp=U38fGa`(t`I?92Y zNBb!C`qC6@E2~ng{Ug|8F*|B(^YhnSJlxlKOg$ zEnBuYOn3TPJ#6KM1%TZ}R*mXk7Hb?OjuQjX&>MRn@a8|Z z?iDM9pj`urFcID>ZA=`|WDmMK>FOq5dTUcUJTg-K`u6(uYD=Kk&yuMS0>26ru^6S$ zJ3K%uIp4EoHZmMi#1k6ti%tBg`N6AzT}(DS93|pg;Vr~p6S4blBDo|`K8fCWbJOO{ z8rQF9`b)syE4&i1qHs<=DXGDPQ#U`C;n3CR@D)D1y{%G)(kdOuL1O_rEdovo#%c}C z^o%E*ihW3jp(0WDDFmzLIgF>-4r)$MPx~veqch$w_Ii4ph&sqD^#g&|`U@CXca_mI z9kw{meg0J~_)R>J$cLt;^9cPM3KhY7Xm4?_y5Wa)FuCFZGMXaEk3wl%LZbs!FKz+cz0`G` zuM-=@QHhmZm;5dK{Eq>I!+_@me||8{@U6{akFk!3d%tA+3lrBC+-Ro#)@C*;w2o@* zQ$tA`9DDyNl}9vV6BD1HTc??~iII!)>=+v2MGc%irqS94eT5SKzIOcyVen+=>4#F! zP%K0sio*osO`iD&+h$Jt@1S^~^PtD-khK7CXq7kd>AbnW+D3|3_J5|a6_!_`lmOhJ z7ps61*1&0?1w_StEIznF(6q4@s&Jv<273O}&=L;7?&Hx)YTGLyB&5EbQ+28v3BM)^+^`?j3g*H4b9-6JyaNLC~1^wt&ke!tbmx3>%@J&{Mk*!V`ho=)GYe1BUP% z8ll?~0Sp|p10z2O4x@;(*NCLlp1ExCaDs>56U zK*Qm-EyC~G>E4R1szQhXdlYLw=%-3*x%CXxg$1Cv03}+yj^oo6`dbj(%$7+r^~49A);#S)4cj& zO)QHxR})LJym?ifnRV(qmC_|&_|Ui+1B^AA2<1(EeLyl@HpA^F!o$MgxgE#m;|D>o z;M}#CdX`H(P%TW);O!Xq82^^}yV0eT1^-nSh!kuP;v9i>S=!Q~4Ov#7b}dCd)@Jf9 zE!`)e-O=F~ol7<}NvrU};wP+xO2@$4=xZw|0Oaia`t_3JM@qZ(k|leQul{j!r3eBx5egv**oIMP86>ay5B|^h zM0x?AgYJKXNz_5raq_!h4q*^WHP6$_$0uUt`W=Kr5Klr790(g*2F-%+=OWlrc$dZH zELxa1jzr`PVlhJi5(p)hfghG|4|0ug8241Bx+0Ed2c!6BPC}ZZm2}}mLU(s%NIitx zgvmBnT?i%`;41WWcz5B72qrAVT|y>`2n}Gn5_`yI;ImSou(?p`(rKzBOHsB#26A0+ za>mV&mY4s8f*FPM3&JmU&D#M+3)gNpUXdi_!3=^8benSsSMm1kTLK;R(@9`slVaSK zLXwIjPB$vC4e>(i_To2(6_X_?SbjDObN0XyF~EHgzkK9F#I)m95rbINVeBz@1omJ4 z?Drh4#9en@K;{1M{=F3Tgd!x#aa>ndKsWGFUwvW-l2wH6Ze%>Hh~D?s2=%zI{N-7i zBuD@bP|InN`hYEQ6;BtMvKn-E(k&6Z*qL1hk3rC^`7DVtjgCjFME1d`{D|mYI&>W( zTmepiB4t$8piT}pq)I_|s|HR_2aw;$d@QrLA6AZq0AJ^UcIYmBEQ?cjFI~Qz?l@_N z+A5uxHxWQ&T@d&kF>$y_ecl>6N?_*+4&?_ol>70WBK?nq6#+w?!Eum1O7)q84~u97X-nS=lx$!;J7eieqm3^EXMT(h9v zz|-U|4b~v=P?w@7Ltyr1@wp@tqegrt>RhV+ggt6x9KT-4RcIXct7NE%J}W}RUD8L{ zI0OV#;8RA#7qvBPNO*E}@1GEErg-|JRE39_rOTtTP5NHu4zRRPaET@lDkD}?B75e{ zV>YC;1?tD}L>qNwa3Z)7mU%j_OwIxPHgtdZk3z1gLw=t~+!bTZR|9Undeu^0FFk@- zoIB=vSi8G_7t2=G)YMG&u{%r2{fKz4Bp)B&5-AuN9aTXP4$q!ZeP|$kH50^b3A?Ko z9Q?}sRWrHtLcaZ#zb&&mt^4u+b9%)C7bK@6TGv&41YP1XH#d*h$!iqj zhI@Mcm5CA(fJ@;5m?n++&N}Ch@ik8_#p&E@9?h3T{N;(>c>BBEedaNh6+bW zJWvRbhEK447k%DH30L;~L=)jnqyk|1rn4A-j#6Sp&z2wso8(-;j(|F3$rE;Rc9t=W z#~us1TeGt8A3W<(`T+4tBm%~pttvSL1=Vh?-mHG<(o4u{g!vKkF2C&N%JoqC50b*J zSK_5aU>F$?n93%5mKEvl!KMs|1=IP6dmVzz1$ly_%^pxZ4KWL;LujDwckE6vk^Gz6StC8dv&YZY<#C;*P z2);cEw6|7iZQMB(NPYDO{xf*i!2u=G4TztLa&tNi!4oH}=cW@OjSWH^l zp>f2is8lLkko{vL!p&>i+aO*@*fn3fyNUT&RL)TTdAND1mo8mOP%0#rqJ3GrH|()c zq;Z!5)nGly<-9zz>(z7pMA4q4l=%JGrR-FIiR8@dvq zsARYS{SMh;wOEZrb?h%T>OTgLZTWwo&|!m~S3YYI+wREA#2!|wzltyJi`5m|gQ0V~ zcqosUMK#eb6+!$+xE&rqDJn8aT%h&~TC^XBSXo?JDucAW1Ar!yz3=D?v7{E;Htt-a zB2vuur=j8~;el^wW=?=491BI)Gpx5JG8Wo{r8UoFKE0 zz1gPF1FJ>@Q#5D_Twv?Y&^y9g4xt(=tG!Cw)xL(f)Y!{{2%gWTq4!6DJVDb}2`;?? zU7eM+XyF(K2i4PhV&EK++v4KsIjBTmf(~-|KkKW&Z&c5VbP}2i5gXh<_tXI~iGXPA zP#z$dZr+xE%-8?#h#+W{NZ3OQcfR_NmErEd5PxoOUF!Hp(0DR{)6;2ILODzOx7r1u zM$}L=!xH^e?@Oi9x_^4NV+ax|U_pnsFm~Md`#`WIT{jd}kz@7upAYLa$Fbry1+y|U zNg>UOApIWX$DZfgpE-Z68srAPM(u5V3=DB#@rIS{KlP?3ROUYah+Tu6{nj%mUdVdI zz{P5c?Cj{cij4vw&h^#@wa+U*&~k;MC!P-(#?vnkh{moUUH}C9afHf!ZELU-r0e8% zQCJ*$c0Uubkw*AI1qr~DCr_r~dB@6c_+vZ^nOB%t8iRZ8jz0ez`$)0)zt^W7pCkH5 z^JBA;%VbXTvG}rI@jU;1Xp{B><_-@Te~PO%yVD!&JaMkYzWbR2hANmeD{_K`l(u@NEN;9a6x_b1B&RjIdB0g>X<_p~N@^LDd-W*n!=MwqAJDwfB z_VV~(-jkVjP19$vj-Qmy17oTJN48vh&*T{fR#Q-{cjVTyqjr7xyT<9$r*HUjl+8W| zxWyVgmt69g4cax4>6AYx-XkL4KkHX#)#oUgkrMW?> zo`FFuHsdCaCVK5na+}dI)e>8-HhtCXw`9%YbMfZxgOjR?U?%b%aj%Ycpuiy%%420mrj0{`?ZVAafqS{PKKZel{@Ae-Jf4 z9Mym%i2{zuJpR!&leEzWfyvnHpm5c&yB%nJTaek}__}~y%ud>@QSOR2oplEb9bmh(= zv!MoecRM3v9Oz;nk^aq^Ag3a=8)?C$e0=3#25e|fHn!6@Zrr$k?_P3&RKlFj5v1qf z>ir5S5)7WNK3<&HLni^tii|}PPDVCM*7r#<45tEmkb&RYG{YJ;Dx`y#kn*?VqV|P2 zRp&noH+I#hMr8)$5k8W=C%V8NfBXRlKqG&MU4_m<8W+kB_xK@B8Z-5DfK7bgv-^22 zLrI*lCr#27PxZ_<_)Q3X9bEbq77t$ejw>yElcPez553S6AOL7|XD zGbDe%48>*om9A&9b+2;K!42Oq>Ub#IZpiG4fx)i3n^}Yal})uWE&jNUWUxE(<_tI= z=!N{aog0e|x&txp0w*|xD6l&Wgiw^Aci3F+D8{S@4wNBHcb7i!Fqt9}#5Iwt2SnV3 zV<1Qw_ME&u@MA_z1-2!L1))u;|rViPQQ%|kG<)n@pwHD1-VfeBc(%( zX~K4LsI85BWNq?gDe{q_9%B3`EP9E?kl5d$`WfQXgG6jfh9@V5FR>(8+Hw{^ z5I5pSqj4fVD%_Z0x3K%->;b><=kq>3KKd99EIsTrmGtD<9-TWK-)r*h zG4;6n!$NR=b>}!dRbCGAfZI zf*=nuLCF9HC5e*r5Uhl(J>J~mn?GQi(BDx?YV+C7rA`McPJrBN$T*>ZBkw?kt@0)l zGnOg@yP#m*83hGlEI*goLE%vB---O}Ip)Pm*M-Y%`h-INK!qnO8KYwdk+>%h99c+K zE-rj!LeUh8v*`wgL&XQ(=Vm8{`aN}zj`Ziuz50k!>h8KQ8gl2NmV+(=L0ws``WIJJ zQOQu&X3??7K$v@|BO@c8Y@%4Z6ZYiOZ{NMkwSWH^UFUJ{g{g0wXN`UD)?l~>)L4M< zP*^KUaQKLHOjX7{RmCX1H}`pUf_I3tXg{h?Og?l1{ut|i2^?K_mJEPdC`ZlV+$i7uMq z-jQT90S}nN2}ejvD&TwGqz;-VPuqy=Oz%FT6IX}l>{N|A`y5~q;8Ex5F z!g26mm00#Df*7#?N~Dh=sqEU4$520$PlnUTFdU3dER59r%ou-?oI=GBqx_F)6?md9VzVXh(jbz7^= zd}VLbT;mhnw$6-)1RcTRxC&ZBhFuWG9*wI!z_|SSt*nuae2^vPyTuog1bj$Hixx|D zX~qYr*s=-dyO6(-MEKj24FQ`FGFO_~;Dw+t_H#c9ZHd|tkd3eL!t1ou{p+e1sx9Mf zN->BHzB!mf;gOM*Jy1Kr^@yuKDwj?SGw5_OzUF8S{)xv zOb)6&9xv&ipCA@Be1lI&$&h?4)cS(rrimdNt;+P7V~LYtN^Ymvkfn?ztA@zIh-DFJ zoP-e|&J*SY-bnmcBHm)CA(KdSBx;B*W1mpu-In9JSdrIiX9XVo&VwZ+)}*)k`_yZb z2RAVwe}`;qFf>{6{UDfinorMd-Gx{*0ZKw9pPwW9QsVNu5@U7b%M)N#oGn;f$eCB% z5`Dor@UQaq#JZOy5GkQPk^zLf$mC$c#_A>?16ZtyW-~8dmiwrIDkrCgyGy<)!5QN+ zw-der&VB-Fz_&1jW7lgU83r^}1DG_ph@HQ$9|i=$^CTRC5(-m~m=h}#_QA%0LfQv`igt4oGn?HsupG!r)Nxa*XnGXNr-Ef{31G z9)jFG0s9mDb_1VxQ|kwK5r`oMgNLvTMsM`>^_OZ6DY76APliOchb`o?;pEUXyf4xb zJU>u_uPlgz3#2RPit{4*NwTC7I+_8hsRcGTS?3>8U_J?R*l9KvY_T!RDiR6<7NA{A z%3{7434?S$vm2;fWFANihuHpScz~#fMn*>Yqh0mgB*}~&vzIypl@eraq5stpUfj+a z#1ptV7AI3fx2LtC)Yu?!Nt{AVoXlVhY1G6VVGWeDql5xNF}*GFgFqxQ=>WCD8enGj z+6U!@#b3P8n9%_S-w!05OWj-9b`#(DfN4lIe6I`{FC|lhBt{f-Hu)v~jA6?9b6XC< zIOLlb$hZMa$cPMK3<8qeAHXx4w#av34vy&KLp7!AW*Vp%Bv`WI&L_A^<7;5j$^2k7 zRKG#Tyf7I@3xLm1xbdXmO<0=cjjK~RtHd*0b$x$U0tl3xFh({apWq=j6IJ9v!$uK_ zCSUGD$9IxM|3O;`^^5KD2j?Sn5_g*8@ZroVn>2*TFg>#LwMZmbl92PAj75wiS{bbn zB8K3eIuN<_SdS7$;QAU2Lex+kwoYV#xGM^GB#uYEB>|7$Hdt>*#6kgyU^JS4KPqZ?Fi5!%S(vodKnn|lefBdq<(RA9hHWK>8KF|RCPQO~=;Hv5Q2>z?!E9whFQ}mk zgwq|oI&t`ZfCiEW3-DBG*FL-7bm#pRk!sc=f^9B?0;Farypr$^&AzrwgCd4JSWnyL>a=gAHHY_U$GUm$CnI#kZdxPlYMjE6oJ`_ zEp#-elgGfsK^H0n2S>)+ESzzZ`qb)IknlY-M)j4AwyYzKGutR*7R=*0($ycMo3>)P zqb(h&PeUEjhBUUxG;})&Nnwj#O)vy8ughq2IWNtfot!MnwDhHG7x;pZyZ@hkMcjWW d-j6O)E;X7r^2_~@g~~~xO30s1IC Date: Mon, 12 Sep 2022 17:22:07 +0200 Subject: [PATCH 18/38] fix for test case --- human_aware_rl/ppo/ppo_rllib_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index c41b2326..7cc4a9cb 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -280,6 +280,8 @@ def test_ppo_bc(self): self.assertDictEqual(results, self.expected['test_ppo_bc']) def test_resume_functionality(self): + if not os.path.exists(self.temp_results_dir): + os.makedirs(self.temp_results_dir) load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500') # Load and train an agent for another iteration results = ex_fp.run( From a9386f7c158a373d9a916b919b918c239cbfbb1a Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Mon, 12 Sep 2022 17:52:58 +0200 Subject: [PATCH 19/38] fix for test case --- human_aware_rl/ppo/ppo_rllib_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index 7cc4a9cb..c89263d7 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -280,8 +280,7 @@ def test_ppo_bc(self): self.assertDictEqual(results, self.expected['test_ppo_bc']) def test_resume_functionality(self): - if not os.path.exists(self.temp_results_dir): - os.makedirs(self.temp_results_dir) + load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500') # Load and train an agent for another iteration results = ex_fp.run( @@ -290,7 +289,8 @@ def test_resume_functionality(self): "num_workers": 1, "num_training_iters": 1, "resume_checkpoint_path": load_path, - "verbose": False + "verbose": False, + "evaluation_display": False }, options={'--loglevel': 'ERROR'} ).result From e4908808de5cf2c38b3006df6eca6b4ba6d12691 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Mon, 12 Sep 2022 23:30:26 +0200 Subject: [PATCH 20/38] get debug info --- human_aware_rl/ppo/ppo_rllib_test.py | 13 ++++++------ run_tests.sh | 30 ++++++++++++++-------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index c89263d7..b2229deb 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -282,6 +282,7 @@ def test_ppo_bc(self): def test_resume_functionality(self): load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500') + print(load_path) # Load and train an agent for another iteration results = ex_fp.run( config_updates={ @@ -327,12 +328,12 @@ def _clear_pickle(): _clear_pickle() suite = unittest.TestSuite() - suite.addTest(TestPPORllib('test_save_load', **args)) - suite.addTest(TestPPORllib('test_ppo_sp_no_phi', **args)) - suite.addTest(TestPPORllib('test_ppo_sp_yes_phi', **args)) - suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args)) - suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args)) - suite.addTest(TestPPORllib('test_ppo_bc', **args)) + # suite.addTest(TestPPORllib('test_save_load', **args)) + # suite.addTest(TestPPORllib('test_ppo_sp_no_phi', **args)) + # suite.addTest(TestPPORllib('test_ppo_sp_yes_phi', **args)) + # suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args)) + # suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args)) + # suite.addTest(TestPPORllib('test_ppo_bc', **args)) suite.addTest(TestPPORllib('test_resume_functionality', **args)) success = unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful() diff --git a/run_tests.sh b/run_tests.sh index 9cf36495..1ad4801e 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -4,21 +4,21 @@ cd ./human_aware_rl # Create a dummy data_dir.py if the file does not already exist [ ! -f data_dir.py ] && echo "import os; DATA_DIR = os.path.abspath('.')" >> data_dir.py - -# Human data tests -cd ./human -python tests.py -cd .. - -# BC tests -cd ./imitation -python behavior_cloning_tf2_test.py -cd .. - -# rllib tests -cd ./rllib -python tests.py -cd .. +# +## Human data tests +#cd ./human +#python tests.py +#cd .. +# +## BC tests +#cd ./imitation +#python behavior_cloning_tf2_test.py +#cd .. +# +## rllib tests +#cd ./rllib +#python tests.py +#cd .. # PPO tests cd ./ppo From 81fec4248564ccf2dec0cee492e822325a143adf Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Mon, 12 Sep 2022 23:46:35 +0200 Subject: [PATCH 21/38] disable logging --- human_aware_rl/ppo/ppo_rllib_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index b2229deb..6088d0a8 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -284,6 +284,8 @@ def test_resume_functionality(self): load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500') print(load_path) # Load and train an agent for another iteration + os.environ["TUNE_DISABLE_AUTO_CALLBACK_LOGGERS"] = "1" + results = ex_fp.run( config_updates={ "results_dir": self.temp_results_dir, @@ -291,6 +293,7 @@ def test_resume_functionality(self): "num_training_iters": 1, "resume_checkpoint_path": load_path, "verbose": False, + "log_to_driver": False, "evaluation_display": False }, options={'--loglevel': 'ERROR'} From 4f1f354e4ea634600eaa6dc5851dfe1bfe7d21bb Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Tue, 13 Sep 2022 00:00:08 +0200 Subject: [PATCH 22/38] disable logging --- human_aware_rl/ppo/ppo_rllib_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index 6088d0a8..5f3494b4 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -291,7 +291,7 @@ def test_resume_functionality(self): "results_dir": self.temp_results_dir, "num_workers": 1, "num_training_iters": 1, - "resume_checkpoint_path": load_path, + # "resume_checkpoint_path": load_path, "verbose": False, "log_to_driver": False, "evaluation_display": False From aa834b64d534ad37903d26fc6fadf2d519c2c3ce Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Tue, 13 Sep 2022 00:39:41 +0200 Subject: [PATCH 23/38] disable logging --- human_aware_rl/ppo/ppo_rllib_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index 5f3494b4..91e7de47 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -291,9 +291,9 @@ def test_resume_functionality(self): "results_dir": self.temp_results_dir, "num_workers": 1, "num_training_iters": 1, - # "resume_checkpoint_path": load_path, + "resume_checkpoint_path": load_path, + "bc_model_dir": load_path, "verbose": False, - "log_to_driver": False, "evaluation_display": False }, options={'--loglevel': 'ERROR'} From 41569c87b6763911a6c8d6ae408096432258f9db Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Thu, 15 Sep 2022 21:28:54 +0200 Subject: [PATCH 24/38] disable logging --- human_aware_rl/ppo/ppo_rllib_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index 91e7de47..4b35e6e4 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -291,8 +291,7 @@ def test_resume_functionality(self): "results_dir": self.temp_results_dir, "num_workers": 1, "num_training_iters": 1, - "resume_checkpoint_path": load_path, - "bc_model_dir": load_path, + # "resume_checkpoint_path": load_path, "verbose": False, "evaluation_display": False }, From 679ea3dcff024d7371f8814f39f4cdbf37c1e926 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Thu, 15 Sep 2022 21:54:36 +0200 Subject: [PATCH 25/38] logging check --- .../checkpoint-500 | Bin .../checkpoint-500.tune_metadata | Bin .../config.pkl | Bin .../progress.csv | 0 .../result.json | 0 human_aware_rl/rllib/rllib.py | 3 ++- 6 files changed, 2 insertions(+), 1 deletion(-) rename human_aware_rl/ppo/trained_example/{cramped_room => PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000}/checkpoint-500 (100%) rename human_aware_rl/ppo/trained_example/{cramped_room => PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000}/checkpoint-500.tune_metadata (100%) rename human_aware_rl/ppo/trained_example/{cramped_room => PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000}/config.pkl (100%) rename human_aware_rl/ppo/trained_example/{cramped_room => PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000}/progress.csv (100%) rename human_aware_rl/ppo/trained_example/{cramped_room => PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000}/result.json (100%) diff --git a/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500 b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500 similarity index 100% rename from human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500 rename to human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500 diff --git a/human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500.tune_metadata b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500.tune_metadata similarity index 100% rename from human_aware_rl/ppo/trained_example/cramped_room/checkpoint-500.tune_metadata rename to human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500.tune_metadata diff --git a/human_aware_rl/ppo/trained_example/cramped_room/config.pkl b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/config.pkl similarity index 100% rename from human_aware_rl/ppo/trained_example/cramped_room/config.pkl rename to human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/config.pkl diff --git a/human_aware_rl/ppo/trained_example/cramped_room/progress.csv b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv similarity index 100% rename from human_aware_rl/ppo/trained_example/cramped_room/progress.csv rename to human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv diff --git a/human_aware_rl/ppo/trained_example/cramped_room/result.json b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json similarity index 100% rename from human_aware_rl/ppo/trained_example/cramped_room/result.json rename to human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py index 222e820a..39ed8ab0 100644 --- a/human_aware_rl/rllib/rllib.py +++ b/human_aware_rl/rllib/rllib.py @@ -512,6 +512,7 @@ def gen_trainer_from_params(params): "log_to_driver" : params['verbose'], "logging_level" : logging.INFO if params['verbose'] else logging.CRITICAL } + print(init_params) ray.init(**init_params) register_env("overcooked_multi_agent", params['ray_params']['env_creator']) ModelCatalog.register_custom_model(params['ray_params']['custom_model_id'], params['ray_params']['custom_model_cls']) @@ -635,7 +636,7 @@ def load_trainer(save_path, true_num_workers=False): with open(config_path, "rb") as f: # We use dill (instead of pickle) here because we must deserialize functions config = dill.load(f) - + print(config) if not true_num_workers: # Override this param to lower overhead in trainer creation config['training_params']['num_workers'] = 0 From 43e7fa15f0e3fa2f797b40584717db92a33ad520 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Thu, 15 Sep 2022 22:44:07 +0200 Subject: [PATCH 26/38] logging check --- human_aware_rl/ppo/ppo_rllib_test.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index 4b35e6e4..193da166 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -41,11 +41,15 @@ def setUp(self): set_global_seed(0) # Temporary disk space to store logging results from tests + self.temp_dir = os.path.join(os.path.abspath('.'), 'temp_dir') self.temp_results_dir = os.path.join(os.path.abspath('.'), 'results_temp') self.temp_model_dir = os.path.join(os.path.abspath('.'), 'model_temp') # Make all necessary directories + if not os.path.exists(self.temp_dir): + os.makedirs(self.temp_dir) + if not os.path.exists(self.temp_model_dir): os.makedirs(self.temp_model_dir) @@ -65,6 +69,7 @@ def tearDown(self): pickle.dump(self.expected, f) # Cleanup + shutil.rmtree(self.temp_dir) shutil.rmtree(self.temp_results_dir) shutil.rmtree(self.temp_model_dir) ray.shutdown() @@ -281,7 +286,7 @@ def test_ppo_bc(self): def test_resume_functionality(self): - load_path = os.path.join(os.path.abspath('.'), 'trained_example/cramped_room/checkpoint-500') + load_path = os.path.join(os.path.abspath('.'), 'trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500') print(load_path) # Load and train an agent for another iteration os.environ["TUNE_DISABLE_AUTO_CALLBACK_LOGGERS"] = "1" @@ -291,7 +296,7 @@ def test_resume_functionality(self): "results_dir": self.temp_results_dir, "num_workers": 1, "num_training_iters": 1, - # "resume_checkpoint_path": load_path, + "resume_checkpoint_path": load_path, "verbose": False, "evaluation_display": False }, @@ -303,7 +308,7 @@ def test_resume_functionality(self): threshold = 0.1 - rewards = get_last_episode_rewards('trained_example/cramped_room/result.json') + rewards = get_last_episode_rewards('trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json') #Test total reward self.assertAlmostEqual(rewards['episode_reward_mean'], results['average_total_reward'], From 11286e3adef6a7e5eb54cbfe3a24e4d2da575f0f Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Thu, 15 Sep 2022 23:29:47 +0200 Subject: [PATCH 27/38] logging check --- human_aware_rl/rllib/rllib.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py index 39ed8ab0..c6398ea4 100644 --- a/human_aware_rl/rllib/rllib.py +++ b/human_aware_rl/rllib/rllib.py @@ -512,11 +512,10 @@ def gen_trainer_from_params(params): "log_to_driver" : params['verbose'], "logging_level" : logging.INFO if params['verbose'] else logging.CRITICAL } - print(init_params) ray.init(**init_params) register_env("overcooked_multi_agent", params['ray_params']['env_creator']) ModelCatalog.register_custom_model(params['ray_params']['custom_model_id'], params['ray_params']['custom_model_cls']) - + print(params) # Parse params model_params = params['model_params'] training_params = params['training_params'] @@ -636,7 +635,7 @@ def load_trainer(save_path, true_num_workers=False): with open(config_path, "rb") as f: # We use dill (instead of pickle) here because we must deserialize functions config = dill.load(f) - print(config) + if not true_num_workers: # Override this param to lower overhead in trainer creation config['training_params']['num_workers'] = 0 From 2d299156fa87510b32f969f1cf3f3978c790f245 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Thu, 15 Sep 2022 23:44:05 +0200 Subject: [PATCH 28/38] logging check --- human_aware_rl/rllib/rllib.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py index c6398ea4..2416a340 100644 --- a/human_aware_rl/rllib/rllib.py +++ b/human_aware_rl/rllib/rllib.py @@ -551,12 +551,14 @@ def custom_logger_creator(config): """Creates a Unified logger that stores results in /__ """ results_dir = params['results_dir'] + print(results_dir) if not os.path.exists(results_dir): try: os.makedirs(results_dir) except Exception as e: print("error creating custom logging dir. Falling back to default logdir {}".format(DEFAULT_RESULTS_DIR)) results_dir = DEFAULT_RESULTS_DIR + print(results_dir) logdir = tempfile.mkdtemp( prefix=logdir_prefix, dir=results_dir) logger = UnifiedLogger(config, logdir, loggers=None) From cca9b1ca6c61f8bd3f5c32287922fd08f6ff57a9 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Thu, 15 Sep 2022 23:51:36 +0200 Subject: [PATCH 29/38] logging check --- human_aware_rl/ppo/ppo_rllib_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index 193da166..94db27fe 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -296,7 +296,7 @@ def test_resume_functionality(self): "results_dir": self.temp_results_dir, "num_workers": 1, "num_training_iters": 1, - "resume_checkpoint_path": load_path, + # "resume_checkpoint_path": load_path, "verbose": False, "evaluation_display": False }, From 71d26226fb0676b6c4612f63e4e6146d430b3810 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Fri, 16 Sep 2022 01:16:29 +0200 Subject: [PATCH 30/38] logging check --- .../ppo/ppo_rllib_from_params_client.py | 10 +++++-- human_aware_rl/ppo/ppo_rllib_test.py | 10 ++----- human_aware_rl/rllib/rllib.py | 29 +++++++++++++++++++ 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/human_aware_rl/ppo/ppo_rllib_from_params_client.py b/human_aware_rl/ppo/ppo_rllib_from_params_client.py index 4da4c673..e74ec0de 100644 --- a/human_aware_rl/ppo/ppo_rllib_from_params_client.py +++ b/human_aware_rl/ppo/ppo_rllib_from_params_client.py @@ -32,7 +32,7 @@ from ray.rllib.models import ModelCatalog from ray.rllib.agents.ppo.ppo import PPOTrainer from human_aware_rl.ppo.ppo_rllib import RllibPPOModel, RllibLSTMPPOModel -from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params, load_trainer +from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params, load_trainer, load_trainer_unittest from human_aware_rl.imitation.behavior_cloning_tf2 import BehaviorCloningPolicy, BC_SAVE_DIR @@ -384,8 +384,14 @@ def run(params): # Check if any resume checkpoint given saved_path = params["resume_checkpoint_path"] + # Check if we load from unit test + unit_test = params["unit_test"] + if saved_path: - trainer = load_trainer(save_path=saved_path, true_num_workers=True) + if unit_test: + trainer = load_trainer(save_path=saved_path, true_num_workers=True, unit_test=True) + else: + trainer = load_trainer(save_path=saved_path, true_num_workers=True) else: # Retrieve the tune.Trainable object that is used for the experiment trainer = gen_trainer_from_params(params) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index 94db27fe..db647a90 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -41,15 +41,11 @@ def setUp(self): set_global_seed(0) # Temporary disk space to store logging results from tests - self.temp_dir = os.path.join(os.path.abspath('.'), 'temp_dir') self.temp_results_dir = os.path.join(os.path.abspath('.'), 'results_temp') self.temp_model_dir = os.path.join(os.path.abspath('.'), 'model_temp') # Make all necessary directories - if not os.path.exists(self.temp_dir): - os.makedirs(self.temp_dir) - if not os.path.exists(self.temp_model_dir): os.makedirs(self.temp_model_dir) @@ -69,7 +65,6 @@ def tearDown(self): pickle.dump(self.expected, f) # Cleanup - shutil.rmtree(self.temp_dir) shutil.rmtree(self.temp_results_dir) shutil.rmtree(self.temp_model_dir) ray.shutdown() @@ -296,9 +291,10 @@ def test_resume_functionality(self): "results_dir": self.temp_results_dir, "num_workers": 1, "num_training_iters": 1, - # "resume_checkpoint_path": load_path, + "resume_checkpoint_path": load_path, "verbose": False, - "evaluation_display": False + "evaluation_display": False, + "unit_test": True }, options={'--loglevel': 'ERROR'} ).result diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py index 2416a340..550ec521 100644 --- a/human_aware_rl/rllib/rllib.py +++ b/human_aware_rl/rllib/rllib.py @@ -649,6 +649,35 @@ def load_trainer(save_path, true_num_workers=False): trainer.restore(save_path) return trainer +def load_trainer(save_path, true_num_workers=False, unit_test=False): + """ + Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer` + Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory + Additionally we decide if we want to use the same number of remote workers (see ray library Training APIs) + as we store in the previous configuration, by default = False, we use only the local worker + (see ray library API) + """ + # Read in params used to create trainer + config_path = os.path.join(os.path.dirname(save_path), "config.pkl") + with open(config_path, "rb") as f: + # We use dill (instead of pickle) here because we must deserialize functions + config = dill.load(f) + + if not true_num_workers: + # Override this param to lower overhead in trainer creation + config['training_params']['num_workers'] = 0 + + if unit_test: + # For the unit testing we update the result directory in order to avoid an error + config['results_dir'] = "/Users/runner/work/human_aware_rl/human_aware_rl/human_aware_rl/ppo/results_temp" + + # Get un-trained trainer object with proper config + trainer = gen_trainer_from_params(config) + + # Load weights into dummy object + trainer.restore(save_path) + return trainer + def get_agent_from_trainer(trainer, policy_id="ppo", agent_index=0): policy = trainer.get_policy(policy_id) dummy_env = trainer.env_creator(trainer.config['env_config']) From 1ea9cd676cdfbb5383458ebdb538c586907ef768 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Fri, 16 Sep 2022 01:28:28 +0200 Subject: [PATCH 31/38] logging check --- human_aware_rl/ppo/ppo_rllib_from_params_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/human_aware_rl/ppo/ppo_rllib_from_params_client.py b/human_aware_rl/ppo/ppo_rllib_from_params_client.py index e74ec0de..29eaa8b5 100644 --- a/human_aware_rl/ppo/ppo_rllib_from_params_client.py +++ b/human_aware_rl/ppo/ppo_rllib_from_params_client.py @@ -32,7 +32,7 @@ from ray.rllib.models import ModelCatalog from ray.rllib.agents.ppo.ppo import PPOTrainer from human_aware_rl.ppo.ppo_rllib import RllibPPOModel, RllibLSTMPPOModel -from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params, load_trainer, load_trainer_unittest +from human_aware_rl.rllib.rllib import OvercookedMultiAgent, save_trainer, gen_trainer_from_params, load_trainer from human_aware_rl.imitation.behavior_cloning_tf2 import BehaviorCloningPolicy, BC_SAVE_DIR From e14105273e238c5bc9abc2ff7bcef3b379d49d2e Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Fri, 16 Sep 2022 01:40:14 +0200 Subject: [PATCH 32/38] logging check --- human_aware_rl/ppo/ppo_rllib_from_params_client.py | 7 +------ human_aware_rl/ppo/ppo_rllib_test.py | 1 - human_aware_rl/rllib/rllib.py | 4 ++-- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/human_aware_rl/ppo/ppo_rllib_from_params_client.py b/human_aware_rl/ppo/ppo_rllib_from_params_client.py index 29eaa8b5..c150f35d 100644 --- a/human_aware_rl/ppo/ppo_rllib_from_params_client.py +++ b/human_aware_rl/ppo/ppo_rllib_from_params_client.py @@ -384,14 +384,9 @@ def run(params): # Check if any resume checkpoint given saved_path = params["resume_checkpoint_path"] - # Check if we load from unit test - unit_test = params["unit_test"] if saved_path: - if unit_test: - trainer = load_trainer(save_path=saved_path, true_num_workers=True, unit_test=True) - else: - trainer = load_trainer(save_path=saved_path, true_num_workers=True) + trainer = load_trainer(save_path=saved_path, true_num_workers=True) else: # Retrieve the tune.Trainable object that is used for the experiment trainer = gen_trainer_from_params(params) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index db647a90..2a9bdf4c 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -294,7 +294,6 @@ def test_resume_functionality(self): "resume_checkpoint_path": load_path, "verbose": False, "evaluation_display": False, - "unit_test": True }, options={'--loglevel': 'ERROR'} ).result diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py index 550ec521..b04da1b6 100644 --- a/human_aware_rl/rllib/rllib.py +++ b/human_aware_rl/rllib/rllib.py @@ -649,7 +649,7 @@ def load_trainer(save_path, true_num_workers=False): trainer.restore(save_path) return trainer -def load_trainer(save_path, true_num_workers=False, unit_test=False): +def load_trainer(save_path, true_num_workers=False): """ Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer` Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory @@ -667,7 +667,7 @@ def load_trainer(save_path, true_num_workers=False, unit_test=False): # Override this param to lower overhead in trainer creation config['training_params']['num_workers'] = 0 - if unit_test: + if "trained_example" in save_path: # For the unit testing we update the result directory in order to avoid an error config['results_dir'] = "/Users/runner/work/human_aware_rl/human_aware_rl/human_aware_rl/ppo/results_temp" From cda439037665b42c810e977036ea10ce17462364 Mon Sep 17 00:00:00 2001 From: Alexander Lichtenstein <49325191+alexlichtenstein@users.noreply.github.com> Date: Fri, 16 Sep 2022 01:49:43 +0200 Subject: [PATCH 33/38] fix logging for unit test --- human_aware_rl/ppo/ppo_rllib_test.py | 16 ++++++---------- human_aware_rl/rllib/rllib.py | 3 --- run_tests.sh | 28 ++++++++++++++-------------- 3 files changed, 20 insertions(+), 27 deletions(-) diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index 2a9bdf4c..00749e45 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -280,12 +280,8 @@ def test_ppo_bc(self): self.assertDictEqual(results, self.expected['test_ppo_bc']) def test_resume_functionality(self): - load_path = os.path.join(os.path.abspath('.'), 'trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500') - print(load_path) # Load and train an agent for another iteration - os.environ["TUNE_DISABLE_AUTO_CALLBACK_LOGGERS"] = "1" - results = ex_fp.run( config_updates={ "results_dir": self.temp_results_dir, @@ -330,12 +326,12 @@ def _clear_pickle(): _clear_pickle() suite = unittest.TestSuite() - # suite.addTest(TestPPORllib('test_save_load', **args)) - # suite.addTest(TestPPORllib('test_ppo_sp_no_phi', **args)) - # suite.addTest(TestPPORllib('test_ppo_sp_yes_phi', **args)) - # suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args)) - # suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args)) - # suite.addTest(TestPPORllib('test_ppo_bc', **args)) + suite.addTest(TestPPORllib('test_save_load', **args)) + suite.addTest(TestPPORllib('test_ppo_sp_no_phi', **args)) + suite.addTest(TestPPORllib('test_ppo_sp_yes_phi', **args)) + suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args)) + suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args)) + suite.addTest(TestPPORllib('test_ppo_bc', **args)) suite.addTest(TestPPORllib('test_resume_functionality', **args)) success = unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful() diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py index b04da1b6..228fceff 100644 --- a/human_aware_rl/rllib/rllib.py +++ b/human_aware_rl/rllib/rllib.py @@ -515,7 +515,6 @@ def gen_trainer_from_params(params): ray.init(**init_params) register_env("overcooked_multi_agent", params['ray_params']['env_creator']) ModelCatalog.register_custom_model(params['ray_params']['custom_model_id'], params['ray_params']['custom_model_cls']) - print(params) # Parse params model_params = params['model_params'] training_params = params['training_params'] @@ -551,14 +550,12 @@ def custom_logger_creator(config): """Creates a Unified logger that stores results in /__ """ results_dir = params['results_dir'] - print(results_dir) if not os.path.exists(results_dir): try: os.makedirs(results_dir) except Exception as e: print("error creating custom logging dir. Falling back to default logdir {}".format(DEFAULT_RESULTS_DIR)) results_dir = DEFAULT_RESULTS_DIR - print(results_dir) logdir = tempfile.mkdtemp( prefix=logdir_prefix, dir=results_dir) logger = UnifiedLogger(config, logdir, loggers=None) diff --git a/run_tests.sh b/run_tests.sh index 1ad4801e..9f264a50 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -5,20 +5,20 @@ cd ./human_aware_rl # Create a dummy data_dir.py if the file does not already exist [ ! -f data_dir.py ] && echo "import os; DATA_DIR = os.path.abspath('.')" >> data_dir.py # -## Human data tests -#cd ./human -#python tests.py -#cd .. -# -## BC tests -#cd ./imitation -#python behavior_cloning_tf2_test.py -#cd .. -# -## rllib tests -#cd ./rllib -#python tests.py -#cd .. +# Human data tests +cd ./human +python tests.py +cd .. + +# BC tests +cd ./imitation +python behavior_cloning_tf2_test.py +cd .. + +# rllib tests +cd ./rllib +python tests.py +cd .. # PPO tests cd ./ppo From d80da4298a1f5b16e37e8142dcab51c715842cd0 Mon Sep 17 00:00:00 2001 From: jyan1999 Date: Tue, 4 Oct 2022 22:18:45 -0700 Subject: [PATCH 34/38] Setup improvement Removed install.sh Added an __init__.py file for human_aware_rl directory so it can be properly recognized as a module Updated workflow file to simplify setup and added pip cache action Included a requirements.txt file for caching github workflow, installation can now be done via pip install -r requirements.txt Updated test suite to allow unittest discovery (though it is pretty limited. More comprehensive discovery requires nontrivial refactoring) Included temporary fix to increase tests stability by setting sgd-size = training-size (it doesn't completely solve the issue, but does significantly increase stability) Updated the README to reflect the changes and fixed some errors --- .github/workflows/python-app.yml | 39 ++-- README.md | 37 ++- human_aware_rl/__init__.py | 0 human_aware_rl/human/tests.py | 127 +++++++---- .../imitation/behavior_cloning_tf2_test.py | 165 ++++++++------ human_aware_rl/ppo/ppo_rllib_test.py | 210 +++++++++--------- human_aware_rl/rllib/tests.py | 84 ++++--- install.sh | 23 -- requirements.txt | 17 ++ run_tests.sh | 24 +- setup.py | 43 ++-- 11 files changed, 432 insertions(+), 337 deletions(-) create mode 100644 human_aware_rl/__init__.py delete mode 100755 install.sh create mode 100644 requirements.txt diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 9c9339ac..f3f3b03a 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -8,31 +8,20 @@ on: jobs: build_osx: - runs-on: macos-latest steps: - - uses: actions/checkout@v2 - with: - submodules: true - - name: Python + Conda setup - uses: conda-incubator/setup-miniconda@v2 - with: - python-version: 3.7 - - name: Install dependencies - run: | - conda init bash - source ~/.bash_profile - conda create -n harl python=3.7 - conda activate harl - python -m pip install --upgrade pip - ./install.sh - pip install tensorflow==2.0.2 - - name: Test with unittest - run: | - conda init bash - source ~/.bash_profile - conda activate harl - sudo chmod 777 ./run_tests.sh - ./run_tests.sh - + - uses: actions/checkout@v2 + with: + submodules: true + - name: Python setup #removed conda setup + uses: actions/setup-python@v4 + with: + python-version: 3.7 + cache: "pip" + - name: Installing dependencies + run: pip install -r requirements.txt + - name: Test with unittest + run: | + sudo chmod 777 ./run_tests.sh + ./run_tests.sh diff --git a/README.md b/README.md index a0148c65..0aa25938 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,9 @@ $ git clone --single-branch --branch BRANCH_NAME --recursive https://github.com/ ## CUDA 10.0 Installation on Ubuntu 18.04 + +**Note:**: The CUDA installation is **_NOT REQUIRED_**, feel free to skip this section if you are only planning on running on CPUs + For Ubuntu 18.04, follow the direction [here](https://www.pugetsystems.com/labs/hpc/How-To-Install-CUDA-10-together-with-9-2-on-Ubuntu-18-04-with-support-for-NVIDIA-20XX-Turing-GPUs-1236/) The only difference being the very last step. @@ -49,6 +52,8 @@ $ sudo apt-get install cuda-10-0 ## Conda Environment Setup +While not strictly required, creating a conda environment simplifies the setup and can help avoid dependency conflicts + Create a new conda environment and run the install script as before [Optional Conda Installation for 18.04](https://www.digitalocean.com/community/tutorials/how-to-install-the-anaconda-python-distribution-on-ubuntu-18-04) @@ -56,14 +61,10 @@ Create a new conda environment and run the install script as before ```bash $ conda create -n harl_rllib python=3.7 $ conda activate harl_rllib -(harl_rllib) $ ./install.sh +(harl_rllib) $ pip install -r requirements.txt ``` -Finally, install the latest stable version of tensorflow compatible with rllib -```bash -(harl_rllib) $ pip install tensorflow==2.0.2 -``` -Or, if working with gpus, install a version of tensorflow 2.*.* and cuDNN that is compatible with the available Cuda drivers. The following example works for Cuda 10.0.0. You can verify what version of Cuda is installed by running `nvcc --version`. For a full list of driver compatibility, refer [here](https://www.tensorflow.org/install/source#gpu) +If working with gpus, install a version of tensorflow 2.*.* and cuDNN that is compatible with the available Cuda drivers. The following example works for Cuda 10.0.0. You can verify what version of Cuda is installed by running `nvcc --version`. For a full list of driver compatibility, refer [here](https://www.tensorflow.org/install/source#gpu) ```bash (harl_rllib) $ pip install tensorflow-gpu==2.0.0 (harl_rllib) $ conda install -c anaconda cudnn=7.6.0 @@ -81,6 +82,10 @@ Note: if you ever get an import error, please first check if you activated the c If set-up was successful, all unit tests and local reproducibility tests should pass. They can be run as follows +**NOTE**: Existing tests **_DOES NOT_** guarantee reproducibility. It is an known issue with version of ray\[rllib\] in use, and we are working on updating to the newest version, which should solve this problem. As a temporary fix, setting sgd-minibatch-size = training-batch-size increases stability + +Due to the randomess there is a slight chance that some tests can fail intermittently by not getting the expected total reward. This is an unlikely scenario and can usually be fixed by rerunning the test. + You can run all the tests with ```bash (harl_rllib) $ ./run_tests.sh @@ -92,20 +97,32 @@ Highest level integration tests that combine self play, bc training, and ppo_bc (harl_rllib) $ cd human_aware_rl/ppo (harl_rllib) human_aware_rl/ppo $ python ppo_rllib_test.py ``` +or +```bash +(harl_rllib) $ python -m unittest human_aware_rl.ppo.ppo_rllib_test +``` ## BC Tests All tests involving creation, training, and saving of bc models. No dependency on rllib +There are 2 test classes depending on whether the model is trained with LSTM, the run_tests.sh file by default only tests model without LSTM ```bash (harl_rllib) $ cd imitation -(harl_rllib) imitation $ python behavior_cloning_tf2_test.py +(harl_rllib) imitation $ python behavior_cloning_tf2_test.py TestBCTraining +``` +or +```bash +(harl_rllib) $ python -m unittest human_aware_rl.imitation.behavior_cloning_tf2_test.TestBCTraining ``` - ## Rllib Tests Tests rllib environments and models, as well as various utility functions. Does not actually test rllib training ```bash (harl_rllib) $ cd rllib (harl_rllib) rllib $ python tests.py ``` +or +```bash +(harl_rllib) $ python -m unittest human_aware_rl.rllib.tests +``` You should see all tests passing. @@ -215,11 +232,13 @@ ModuleNotFoundError: No module named 'human_aware_rl.data_dir' , please run ``` -./run_tests.sh +pip install -r requirements.txt ``` to initiate those variables +The reason this is needed is because code files refer to the subdirectories as modules, and we decided to use pip to automatically add the submodules paths. This command invokes the `setup.py` file, which looks for packages in the _human_aware_rl_ directory through the `find_packages()` call, and register modules found so they can be referrenced. + # Reproducing Results The specific results in that paper were obtained using code that is no longer in the master branch. If you are interested in reproducing results, please check out [this](https://github.com/HumanCompatibleAI/human_aware_rl/tree/neurips2019) and follow the install instructions there. diff --git a/human_aware_rl/__init__.py b/human_aware_rl/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/human_aware_rl/human/tests.py b/human_aware_rl/human/tests.py index bb5a57d0..58f0cfaf 100644 --- a/human_aware_rl/human/tests.py +++ b/human_aware_rl/human/tests.py @@ -1,41 +1,54 @@ import unittest, os, shutil import numpy as np import pickle, copy +import sys from numpy.testing._private.utils import assert_raises from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld, OvercookedState from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv from overcooked_ai_py.agents.agent import AgentPair, GreedyHumanModel -from overcooked_ai_py.planning.planners import MediumLevelActionManager, NO_COUNTERS_PARAMS +from overcooked_ai_py.planning.planners import ( + MediumLevelActionManager, + NO_COUNTERS_PARAMS, +) from human_aware_rl.utils import equal_dicts from human_aware_rl.static import * -from human_aware_rl.human.process_dataframes import csv_to_df_pickle, get_trajs_from_data +from human_aware_rl.human.process_dataframes import ( + csv_to_df_pickle, + get_trajs_from_data, +) from human_aware_rl.human.process_human_trials import main as process_human_trials_main + class TestProcessDataFrames(unittest.TestCase): - temp_data_dir = 'this_is_a_temp' + temp_data_dir = "this_is_a_temp" data_len_2019 = 3546 data_len_2020 = 1189 base_csv_to_df_params = { - "csv_path" : DUMMY_2020_RAW_HUMAN_DATA_PATH, - "out_dir" : "this_is_a_temp", - "out_file_prefix" : 'unittest', - "button_presses_threshold" : 0.25, - "perform_train_test_split" : False, - "silent" : True + "csv_path": DUMMY_2020_RAW_HUMAN_DATA_PATH, + "out_dir": "this_is_a_temp", + "out_file_prefix": "unittest", + "button_presses_threshold": 0.25, + "perform_train_test_split": False, + "silent": True, } base_get_trajs_from_data_params = { - "data_path" : DUMMY_2019_CLEAN_HUMAN_DATA_PATH, - "featurize_states" : False, - "check_trajectories" : False, - "silent" : True, - "layouts" : ['cramped_room'] + "data_path": DUMMY_2019_CLEAN_HUMAN_DATA_PATH, + "featurize_states": False, + "check_trajectories": False, + "silent": True, + "layouts": ["cramped_room"], } def setUp(self): + print( + "\nIn Class {}, in Method {}".format( + self.__class__.__name__, self._testMethodName + ) + ) if not os.path.exists(self.temp_data_dir): os.makedirs(self.temp_data_dir) @@ -44,28 +57,27 @@ def tearDown(self): def test_csv_to_df_pickle_2019(self): params = copy.deepcopy(self.base_csv_to_df_params) - params['csv_path'] = DUMMY_2019_RAW_HUMAN_DATA_PATH - params['button_presses_threshold'] = 0.0 + params["csv_path"] = DUMMY_2019_RAW_HUMAN_DATA_PATH + params["button_presses_threshold"] = 0.0 data = csv_to_df_pickle(**params) self.assertEqual(len(data), self.data_len_2019) params = copy.deepcopy(self.base_csv_to_df_params) - params['csv_path'] = DUMMY_2019_RAW_HUMAN_DATA_PATH - params['button_presses_threshold'] = 0.7 + params["csv_path"] = DUMMY_2019_RAW_HUMAN_DATA_PATH + params["button_presses_threshold"] = 0.7 data = csv_to_df_pickle(**params) self.assertLess(len(data), self.data_len_2019) def test_csv_to_df_pickle_2020(self): params = copy.deepcopy(self.base_csv_to_df_params) - params['button_presses_threshold'] = 0.0 + params["button_presses_threshold"] = 0.0 data = csv_to_df_pickle(**params) self.assertEqual(len(data), self.data_len_2020) params = copy.deepcopy(self.base_csv_to_df_params) - params['button_presses_threshold'] = 0.7 + params["button_presses_threshold"] = 0.7 data = csv_to_df_pickle(**params) self.assertLess(len(data), self.data_len_2020) - def test_csv_to_df_pickle(self): # Try various button thresholds (hand-picked to lie between different values for dummy data games) @@ -74,17 +86,17 @@ def test_csv_to_df_pickle(self): for threshold in button_thresholds: # dummy dataset is too small to partion so we set train_test_split=False params = copy.deepcopy(self.base_csv_to_df_params) - params['button_presses_threshold'] = threshold + params["button_presses_threshold"] = threshold data = csv_to_df_pickle(**params) lengths.append(len(data)) - + # Filtered data size should be monotonically decreasing wrt button_threshold for i in range(len(lengths) - 1): - self.assertGreaterEqual(lengths[i], lengths[i+1]) + self.assertGreaterEqual(lengths[i], lengths[i + 1]) # Picking a threshold that's suficiently high discards all data, should result in value error params = copy.deepcopy(self.base_csv_to_df_params) - params['button_presses_threshold'] = 0.8 + params["button_presses_threshold"] = 0.8 self.assertRaises(ValueError, csv_to_df_pickle, **params) def test_get_trajs_from_data_2019(self): @@ -93,22 +105,22 @@ def test_get_trajs_from_data_2019(self): def test_get_trajs_from_data_2019_featurize(self): params = copy.deepcopy(self.base_get_trajs_from_data_params) - params['featurize_states'] = True + params["featurize_states"] = True trajectories, _ = get_trajs_from_data(**params) def test_get_trajs_from_data_2020(self): # Ensure we can properly deserialize states with updated objects (i.e tomatoes) params = copy.deepcopy(self.base_get_trajs_from_data_params) - params['layouts'] = ['inverse_marshmallow_experiment'] - params['data_path'] = DUMMY_2020_CLEAN_HUMAN_DATA_PATH + params["layouts"] = ["inverse_marshmallow_experiment"] + params["data_path"] = DUMMY_2020_CLEAN_HUMAN_DATA_PATH trajectories, _ = get_trajs_from_data(**params) def test_get_trajs_from_data_2020_featurize(self): # Ensure we can properly featurize states with updated dynamics and updated objects (i.e tomatoes) params = copy.deepcopy(self.base_get_trajs_from_data_params) - params['layouts'] = ['inverse_marshmallow_experiment'] - params['data_path'] = DUMMY_2020_CLEAN_HUMAN_DATA_PATH - params['featurize_states'] = True + params["layouts"] = ["inverse_marshmallow_experiment"] + params["data_path"] = DUMMY_2020_CLEAN_HUMAN_DATA_PATH + params["featurize_states"] = True trajectories, _ = get_trajs_from_data(**params) def test_csv_to_df_to_trajs_integration(self): @@ -117,33 +129,53 @@ def test_csv_to_df_to_trajs_integration(self): _ = csv_to_df_pickle(**params) params = copy.deepcopy(self.base_get_trajs_from_data_params) - params['data_path'] = os.path.join(self.temp_data_dir, 'unittest_all.pickle') - params['layouts'] = ['inverse_marshmallow_experiment'] + params["data_path"] = os.path.join(self.temp_data_dir, "unittest_all.pickle") + params["layouts"] = ["inverse_marshmallow_experiment"] _ = get_trajs_from_data(**params) + class TestHumanDataConversion(unittest.TestCase): - temp_dir = 'this_is_also_a_temp' + temp_dir = "this_is_also_a_temp" infile = DUMMY_2019_CLEAN_HUMAN_DATA_PATH horizon = 400 DATA_TYPE = "train" layout_name = "cramped_room" def _equal_pickle_and_env_state_dict(self, pickle_state_dict, env_state_dict): - return equal_dicts(pickle_state_dict, env_state_dict, ['timestep', 'all_orders', 'bonus_orders']) + return equal_dicts( + pickle_state_dict, + env_state_dict, + ["timestep", "all_orders", "bonus_orders"], + ) def setUp(self): + print( + "\nIn Class {}, in Method {}".format( + self.__class__.__name__, self._testMethodName + ) + ) if not os.path.exists(self.temp_dir): os.makedirs(self.temp_dir) - + self.base_mdp = OvercookedGridworld.from_layout_name(self.layout_name) - self.mlam = MediumLevelActionManager.from_pickle_or_compute(self.base_mdp, NO_COUNTERS_PARAMS, - force_compute=True, info=False) - self.env = OvercookedEnv.from_mdp(self.base_mdp, horizon=self.horizon, info_level=0) + self.mlam = MediumLevelActionManager.from_pickle_or_compute( + self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True, info=False + ) + self.env = OvercookedEnv.from_mdp( + self.base_mdp, horizon=self.horizon, info_level=0 + ) self.starting_state_dict = self.base_mdp.get_standard_start_state().to_dict() - outfile = process_human_trials_main(self.infile, self.temp_dir, insert_interacts=True, verbose=False, forward_port=False, fix_json=False) - with open(outfile, 'rb') as f: + outfile = process_human_trials_main( + self.infile, + self.temp_dir, + insert_interacts=True, + verbose=False, + forward_port=False, + fix_json=False, + ) + with open(outfile, "rb") as f: self.human_data = pickle.load(f)[self.layout_name] def tearDown(self): @@ -156,15 +188,16 @@ def test_state(self): self.env.reset() else: self.assertTrue( - self._equal_pickle_and_env_state_dict(state_dict, self.env.state.to_dict()), + self._equal_pickle_and_env_state_dict( + state_dict, self.env.state.to_dict() + ), "Expected state:\t\n{}\n\nActual state:\t\n{}".format( - self.env.state.to_dict(), - state_dict - ) + self.env.state.to_dict(), state_dict + ), ) self.env.step(joint_action=joint_action) idx += 1 -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/human_aware_rl/imitation/behavior_cloning_tf2_test.py b/human_aware_rl/imitation/behavior_cloning_tf2_test.py index f82085bc..e3a132a0 100644 --- a/human_aware_rl/imitation/behavior_cloning_tf2_test.py +++ b/human_aware_rl/imitation/behavior_cloning_tf2_test.py @@ -1,10 +1,26 @@ -import unittest, os, shutil, copy, pickle, random, argparse, sys +import unittest, os, shutil, warnings, pickle, argparse, sys import numpy as np import tensorflow as tf from human_aware_rl.utils import set_global_seed -from human_aware_rl.imitation.behavior_cloning_tf2 import BC_SAVE_DIR, get_bc_params, train_bc_model, build_bc_model, save_bc_model, load_bc_model, evaluate_bc_model +from human_aware_rl.imitation.behavior_cloning_tf2 import ( + BC_SAVE_DIR, + get_bc_params, + train_bc_model, + build_bc_model, + save_bc_model, + load_bc_model, + evaluate_bc_model, +) from human_aware_rl.human.process_dataframes import get_trajs_from_data -from human_aware_rl.static import BC_EXPECTED_DATA_PATH, DUMMY_2019_CLEAN_HUMAN_DATA_PATH +from human_aware_rl.static import ( + BC_EXPECTED_DATA_PATH, + DUMMY_2019_CLEAN_HUMAN_DATA_PATH, +) + + +def _clear_pickle(): + with open(BC_EXPECTED_DATA_PATH, "wb") as f: + pickle.dump({}, f) class TestBCTraining(unittest.TestCase): @@ -19,15 +35,31 @@ class TestBCTraining(unittest.TestCase): Note, this test always performs a basic sanity check to verify some learning is happening, even if the `strict` param is false """ - def __init__(self, test_name, compute_pickle, strict, min_performance, **kwargs): + def __init__(self, test_name): super(TestBCTraining, self).__init__(test_name) - self.compute_pickle = compute_pickle - self.strict = strict - self.min_performance = min_performance - + self.compute_pickle = False + self.strict = False + self.min_performance = 0 + assert not ( + self.compute_pickle and self.strict + ), "Cannot compute pickle and run strict reproducibility tests at same time" + if self.compute_pickle: + _clear_pickle() + def setUp(self): set_global_seed(0) - self.bc_params = get_bc_params(**{"data_path" : DUMMY_2019_CLEAN_HUMAN_DATA_PATH}) + print( + "\nIn Class {}, in Method {}".format( + self.__class__.__name__, self._testMethodName + ) + ) + # unittest generates a lot of warning msgs due to third-party dependencies (e.g. ray[rllib] using outdated np methods) + # not a problem when directly ran, but when using -m unittest this helps filter out the warnings + warnings.simplefilter("ignore", ResourceWarning) + warnings.simplefilter("ignore", DeprecationWarning) + self.bc_params = get_bc_params( + **{"data_path": DUMMY_2019_CLEAN_HUMAN_DATA_PATH} + ) self.bc_params["mdp_params"]["layout_name"] = "cramped_room" self.bc_params["training_params"]["epochs"] = 1 self.model_dir = os.path.join(BC_SAVE_DIR, "test_model") @@ -35,45 +67,57 @@ def setUp(self): if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) - processed_trajs, _ = get_trajs_from_data(**self.bc_params["data_params"], silent=True) + processed_trajs, _ = get_trajs_from_data( + **self.bc_params["data_params"], silent=True + ) self.dummy_input = np.vstack(processed_trajs["ep_states"])[:1, :] - self.initial_states = [np.zeros((1, self.bc_params['cell_size'])), np.zeros((1, self.bc_params['cell_size']))] + self.initial_states = [ + np.zeros((1, self.bc_params["cell_size"])), + np.zeros((1, self.bc_params["cell_size"])), + ] with open(BC_EXPECTED_DATA_PATH, "rb") as f: self.expected = pickle.load(f) # Disable TF warnings and infos - tf.get_logger().setLevel('ERROR') + tf.get_logger().setLevel("ERROR") def tearDown(self): if self.compute_pickle: - with open(BC_EXPECTED_DATA_PATH, 'wb') as f: + with open(BC_EXPECTED_DATA_PATH, "wb") as f: pickle.dump(self.expected, f) shutil.rmtree(self.model_dir) def test_model_construction(self): model = build_bc_model(**self.bc_params) - + if self.compute_pickle: - self.expected['test_model_construction'] = model(self.dummy_input) + self.expected["test_model_construction"] = model(self.dummy_input) if self.strict: - self.assertTrue(np.allclose(model(self.dummy_input), self.expected["test_model_construction"])) + self.assertTrue( + np.allclose( + model(self.dummy_input), self.expected["test_model_construction"] + ) + ) def test_save_and_load(self): model = build_bc_model(**self.bc_params) save_bc_model(self.model_dir, model, self.bc_params) loaded_model, loaded_params = load_bc_model(self.model_dir) self.assertDictEqual(self.bc_params, loaded_params) - self.assertTrue(np.allclose(model(self.dummy_input), loaded_model(self.dummy_input))) - + self.assertTrue( + np.allclose(model(self.dummy_input), loaded_model(self.dummy_input)) + ) - def test_training(self): + def test_training(self): model = train_bc_model(self.model_dir, self.bc_params) if self.compute_pickle: - self.expected['test_training'] = model(self.dummy_input) + self.expected["test_training"] = model(self.dummy_input) if self.strict: - self.assertTrue(np.allclose(model(self.dummy_input), self.expected["test_training"])) + self.assertTrue( + np.allclose(model(self.dummy_input), self.expected["test_training"]) + ) def test_agent_evaluation(self): self.bc_params["training_params"]["epochs"] = 20 @@ -84,30 +128,41 @@ def test_agent_evaluation(self): self.assertGreaterEqual(results, self.min_performance) if self.compute_pickle: - self.expected['test_agent_evaluation'] = results + self.expected["test_agent_evaluation"] = results if self.strict: - self.assertAlmostEqual(results, self.expected['test_agent_evaluation']) + self.assertAlmostEqual(results, self.expected["test_agent_evaluation"]) + +class TestBCTrainingLSTM(TestBCTraining): + # LSTM tests break on older versions of tensorflow so be careful with this def test_lstm_construction(self): - self.bc_params['use_lstm'] = True + self.bc_params["use_lstm"] = True model = build_bc_model(**self.bc_params) if self.compute_pickle: - self.expected['test_lstm_construction'] = model(self.dummy_input) + self.expected["test_lstm_construction"] = model(self.dummy_input) if self.strict: - self.assertTrue(np.allclose(model(self.dummy_input), self.expected["test_lstm_construction"])) + self.assertTrue( + np.allclose( + model(self.dummy_input), self.expected["test_lstm_construction"] + ) + ) def test_lstm_training(self): - self.bc_params['use_lstm'] = True + self.bc_params["use_lstm"] = True model = train_bc_model(self.model_dir, self.bc_params) if self.compute_pickle: - self.expected['test_lstm_training'] = model(self.dummy_input) + self.expected["test_lstm_training"] = model(self.dummy_input) if self.strict: - self.assertTrue(np.allclose(model(self.dummy_input), self.expected["test_lstm_training"])) + self.assertTrue( + np.allclose( + model(self.dummy_input), self.expected["test_lstm_training"] + ) + ) def test_lstm_evaluation(self): - self.bc_params['use_lstm'] = True + self.bc_params["use_lstm"] = True self.bc_params["training_params"]["epochs"] = 1 model = train_bc_model(self.model_dir, self.bc_params) results = evaluate_bc_model(model, self.bc_params) @@ -116,17 +171,22 @@ def test_lstm_evaluation(self): self.assertGreaterEqual(results, self.min_performance) if self.compute_pickle: - self.expected['test_lstm_evaluation'] = results + self.expected["test_lstm_evaluation"] = results if self.strict: - self.assertAlmostEqual(results, self.expected['test_lstm_evaluation']) + self.assertAlmostEqual(results, self.expected["test_lstm_evaluation"]) def test_lstm_save_and_load(self): - self.bc_params['use_lstm'] = True + self.bc_params["use_lstm"] = True model = build_bc_model(**self.bc_params) save_bc_model(self.model_dir, model, self.bc_params) loaded_model, loaded_params = load_bc_model(self.model_dir) self.assertDictEqual(self.bc_params, loaded_params) - self.assertTrue(np.allclose(self._lstm_forward(model, self.dummy_input)[0], self._lstm_forward(loaded_model, self.dummy_input)[0])) + self.assertTrue( + np.allclose( + self._lstm_forward(model, self.dummy_input)[0], + self._lstm_forward(loaded_model, self.dummy_input)[0], + ) + ) def _lstm_forward(self, model, obs_batch, states=None): obs_batch = np.expand_dims(obs_batch, 1) @@ -138,38 +198,5 @@ def _lstm_forward(self, model, obs_batch, states=None): return logits, states - -def _clear_pickle(): - with open(BC_EXPECTED_DATA_PATH, 'wb') as f: - pickle.dump({}, f) - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--compute-pickle', '-cp', action="store_true") - parser.add_argument('--strict', '-s', action="store_true") - parser.add_argument('--min-performance', '-mp', default=0) - parser.add_argument('--run-lstm-tests', action="store_true") - - args = vars(parser.parse_args()) - - tf_version = tf.__version__ - - assert not (args['compute_pickle'] and args['strict']), "Cannot compute pickle and run strict reproducibility tests at same time" - - if args['compute_pickle']: - _clear_pickle() - - suite = unittest.TestSuite() - suite.addTest(TestBCTraining('test_model_construction', **args)) - suite.addTest(TestBCTraining('test_save_and_load', **args)) - suite.addTest(TestBCTraining('test_training', **args)) - suite.addTest(TestBCTraining('test_agent_evaluation', **args)) - - # LSTM tests break on older versions of tensorflow so be careful with this - if args['run_lstm_tests']: - suite.addTest(TestBCTraining('test_lstm_save_and_load', **args)) - suite.addTest(TestBCTraining('test_lstm_construction', **args)) - suite.addTest(TestBCTraining('test_lstm_training', **args)) - suite.addTest(TestBCTraining('test_lstm_evaluation', **args)) - success = unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful() - sys.exit(not success) \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index 00749e45..30c6923b 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -1,5 +1,6 @@ -import unittest, os, shutil, pickle, ray, random, argparse, sys, glob -os.environ['RUN_ENV'] = 'local' +import unittest, os, shutil, pickle, ray, random, glob, warnings, os + +os.environ["RUN_ENV"] = "local" from human_aware_rl.ppo.ppo_rllib_client import ex from human_aware_rl.ppo.ppo_rllib_from_params_client import ex_fp from human_aware_rl.static import PPO_EXPECTED_DATA_PATH @@ -19,6 +20,13 @@ def set_global_seed(seed): tf.random.set_seed(seed) tf.compat.v1.set_random_seed(seed) + +def _clear_pickle(): + # Write an empty dictionary to our static "expected" results location + with open(PPO_EXPECTED_DATA_PATH, "wb") as f: + pickle.dump({}, f) + + class TestPPORllib(unittest.TestCase): """ @@ -31,19 +39,37 @@ class TestPPORllib(unittest.TestCase): Note, this test always performs a basic sanity check to verify some learning is happening, even if the `strict` param is false """ - def __init__(self, test_name, compute_pickle, strict, min_performance): + def __init__(self, test_name): super(TestPPORllib, self).__init__(test_name) - self.compute_pickle = compute_pickle - self.strict = strict - self.min_performance = min_performance + # changing the cwd to where the test file is + # default parameters, feel free to change + self.compute_pickle = False + # Reproducibility test + self.strict = False + self.min_performance = 5 + assert not ( + self.compute_pickle and self.strict + ), "Cannot compute pickle and run strict reproducibility tests at same time" + if self.compute_pickle: + _clear_pickle() def setUp(self): set_global_seed(0) + print( + "\nIn Class {}, in Method {}".format( + self.__class__.__name__, self._testMethodName + ) + ) + # unittest generates a lot of warning msgs due to third-party dependencies (e.g. ray[rllib] using outdated np methods) + # not a problem when directly ran, but when using -m unittest this helps filter out the warnings + warnings.simplefilter("ignore", ResourceWarning) + warnings.simplefilter("ignore", DeprecationWarning) + # Setting CWD + os.chdir(os.path.dirname(os.path.abspath(__file__))) # Temporary disk space to store logging results from tests - self.temp_results_dir = os.path.join(os.path.abspath('.'), 'results_temp') - self.temp_model_dir = os.path.join(os.path.abspath('.'), 'model_temp') - + self.temp_results_dir = os.path.join(os.path.abspath("."), "results_temp") + self.temp_model_dir = os.path.join(os.path.abspath("."), "model_temp") # Make all necessary directories if not os.path.exists(self.temp_model_dir): @@ -53,7 +79,7 @@ def setUp(self): os.makedirs(self.temp_results_dir) # Load in expected values (this is an empty dict if compute_pickle=True) - with open(PPO_EXPECTED_DATA_PATH, 'rb') as f: + with open(PPO_EXPECTED_DATA_PATH, "rb") as f: self.expected = pickle.load(f) def tearDown(self): @@ -61,7 +87,7 @@ def tearDown(self): # Note: This causes unit tests to have a side effect (generally frowned upon) and only works because # unittest is single threaded. If tests were run concurrently this could result in a race condition! if self.compute_pickle: - with open(PPO_EXPECTED_DATA_PATH, 'wb') as f: + with open(PPO_EXPECTED_DATA_PATH, "wb") as f: pickle.dump(self.expected, f) # Cleanup @@ -75,8 +101,8 @@ def test_save_load(self): config_updates={ # Please feel free to modify the parameters below "results_dir": self.temp_results_dir, - "experiment_name" : "save_load_test", - "layout_name" : "cramped_room", + "experiment_name": "save_load_test", + "layout_name": "cramped_room", "num_workers": 1, "train_batch_size": 800, "sgd_minibatch_size": 800, @@ -86,16 +112,20 @@ def test_save_load(self): "entropy_coeff_end": 0.0, "use_phi": False, "evaluation_display": False, - "verbose" : False + "verbose": False, }, - options={'--loglevel': 'ERROR'} + options={"--loglevel": "ERROR"}, ) # Kill all ray processes to ensure loading works in a vaccuum ray.shutdown() # Where the agent is stored (this is kind of hardcoded, would like for it to be more easily obtainable) - load_path = os.path.join(glob.glob(os.path.join(self.temp_results_dir, "save_load_test*"))[0], 'checkpoint_2', 'checkpoint-2') + load_path = os.path.join( + glob.glob(os.path.join(self.temp_results_dir, "save_load_test*"))[0], + "checkpoint_2", + "checkpoint-2", + ) # Load a dummy state mdp = OvercookedGridworld.from_layout_name("cramped_room") @@ -114,12 +144,13 @@ def test_save_load(self): # Now let's load an agent pair and evaluate it agent_pair = load_agent_pair(load_path) - ae = AgentEvaluator.from_layout_name(mdp_params={"layout_name" : "cramped_room"}, env_params={"horizon" : 400}) + ae = AgentEvaluator.from_layout_name( + mdp_params={"layout_name": "cramped_room"}, env_params={"horizon": 400} + ) # We assume no runtime errors => success, no performance consistency check for now ae.evaluate_agent_pair(agent_pair, 1, info=False) - def test_ppo_sp_no_phi(self): # Train a self play agent for 20 iterations results = ex.run( @@ -127,7 +158,7 @@ def test_ppo_sp_no_phi(self): # Please feel free to modify the parameters below "results_dir": self.temp_results_dir, "num_workers": 2, - "train_batch_size": 1600, + "train_batch_size": 800, "sgd_minibatch_size": 800, "num_training_iters": 30, "evaluation_interval": 10, @@ -135,20 +166,19 @@ def test_ppo_sp_no_phi(self): "entropy_coeff_end": 0.0, "use_phi": False, "evaluation_display": False, - "verbose" : False + "verbose": False, }, - options={'--loglevel': 'ERROR'} + options={"--loglevel": "ERROR"}, ).result - # Sanity check (make sure it begins to learn to receive dense reward) - self.assertGreaterEqual(results['average_total_reward'], self.min_performance) + self.assertGreaterEqual(results["average_total_reward"], self.min_performance) if self.compute_pickle: - self.expected['test_ppo_sp_no_phi'] = results + self.expected["test_ppo_sp_no_phi"] = results # Reproducibility test if self.strict: - self.assertDictEqual(results, self.expected['test_ppo_sp_no_phi']) + self.assertDictEqual(results, self.expected["test_ppo_sp_no_phi"]) def test_ppo_sp_yes_phi(self): # Train a self play agent for 20 iterations @@ -157,7 +187,7 @@ def test_ppo_sp_yes_phi(self): # Please feel free to modify the parameters below "results_dir": self.temp_results_dir, "num_workers": 2, - "train_batch_size": 1600, + "train_batch_size": 800, "sgd_minibatch_size": 800, "num_training_iters": 30, "evaluation_interval": 10, @@ -165,21 +195,20 @@ def test_ppo_sp_yes_phi(self): "entropy_coeff_end": 0.0, "use_phi": True, "evaluation_display": False, - "verbose" : False + "verbose": False, }, - options={'--loglevel': 'ERROR'} + options={"--loglevel": "ERROR"}, ).result # Sanity check (make sure it begins to learn to receive dense reward) - self.assertGreaterEqual(results['average_total_reward'], self.min_performance) + self.assertGreaterEqual(results["average_total_reward"], self.min_performance) if self.compute_pickle: - self.expected['test_ppo_sp_yes_phi'] = results + self.expected["test_ppo_sp_yes_phi"] = results # Reproducibility test if self.strict: - self.assertDictEqual(results, self.expected['test_ppo_sp_yes_phi']) - + self.assertDictEqual(results, self.expected["test_ppo_sp_yes_phi"]) def test_ppo_fp_sp_no_phi(self): # Train a self play agent for 20 iterations @@ -198,21 +227,19 @@ def test_ppo_fp_sp_no_phi(self): "seeds": [0], "outer_shape": (5, 4), "evaluation_display": False, - "verbose" : False + "verbose": False, }, - options={'--loglevel': 'ERROR'} + options={"--loglevel": "ERROR"}, ).result - # Sanity check (make sure it begins to learn to receive dense reward) - self.assertGreaterEqual(results['average_total_reward'], self.min_performance) + self.assertGreaterEqual(results["average_total_reward"], self.min_performance) if self.compute_pickle: - self.expected['test_ppo_fp_sp_no_phi'] = results + self.expected["test_ppo_fp_sp_no_phi"] = results # Reproducibility test if self.strict: - self.assertDictEqual(results, self.expected['test_ppo_fp_sp_no_phi']) - + self.assertDictEqual(results, self.expected["test_ppo_fp_sp_no_phi"]) def test_ppo_fp_sp_yes_phi(self): # Train a self play agent for 20 iterations @@ -231,56 +258,59 @@ def test_ppo_fp_sp_yes_phi(self): "seeds": [0], "outer_shape": (5, 4), "evaluation_display": False, - "verbose" : False + "verbose": False, }, - options={'--loglevel': 'ERROR'} + options={"--loglevel": "ERROR"}, ).result # Sanity check (make sure it begins to learn to receive dense reward) - self.assertGreaterEqual(results['average_total_reward'], self.min_performance) + self.assertGreaterEqual(results["average_total_reward"], self.min_performance) if self.compute_pickle: - self.expected['test_ppo_fp_sp_yes_phi'] = results + self.expected["test_ppo_fp_sp_yes_phi"] = results # Reproducibility test if self.strict: - self.assertDictEqual(results, self.expected['test_ppo_fp_sp_yes_phi']) - + self.assertDictEqual(results, self.expected["test_ppo_fp_sp_yes_phi"]) def test_ppo_bc(self): # Train bc model model_dir = self.temp_model_dir params_to_override = { - "layouts" : ['inverse_marshmallow_experiment'], - "data_path" : None, - "epochs" : 10 + "layouts": ["inverse_marshmallow_experiment"], + "data_path": None, + "epochs": 10, } bc_params = get_bc_params(**params_to_override) train_bc_model(model_dir, bc_params) # Train rllib model config_updates = { - "results_dir" : self.temp_results_dir, - "bc_schedule" : [(0.0, 0.0), (8e3, 1.0)], - "num_training_iters" : 20, - "bc_model_dir" : model_dir, - "evaluation_interval" : 5, - "verbose" : False + "results_dir": self.temp_results_dir, + "bc_schedule": [(0.0, 0.0), (8e3, 1.0)], + "num_training_iters": 20, + "bc_model_dir": model_dir, + "evaluation_interval": 5, + "verbose": False, } - results = ex.run(config_updates=config_updates, options={'--loglevel': 'ERROR'}).result - + results = ex.run( + config_updates=config_updates, options={"--loglevel": "ERROR"} + ).result # Sanity check - self.assertGreaterEqual(results['average_total_reward'], self.min_performance) + self.assertGreaterEqual(results["average_total_reward"], self.min_performance) if self.compute_pickle: - self.expected['test_ppo_bc'] = results + self.expected["test_ppo_bc"] = results # Reproducibility test if self.strict: - self.assertDictEqual(results, self.expected['test_ppo_bc']) + self.assertDictEqual(results, self.expected["test_ppo_bc"]) def test_resume_functionality(self): - load_path = os.path.join(os.path.abspath('.'), 'trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500') + load_path = os.path.join( + os.path.abspath("."), + "trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500", + ) # Load and train an agent for another iteration results = ex_fp.run( config_updates={ @@ -291,51 +321,31 @@ def test_resume_functionality(self): "verbose": False, "evaluation_display": False, }, - options={'--loglevel': 'ERROR'} + options={"--loglevel": "ERROR"}, ).result - - #Test that the rewards from 1 additional iteration are not too different from the original model - #performance + # Test that the rewards from 1 additional iteration are not too different from the original model + # performance threshold = 0.1 - rewards = get_last_episode_rewards('trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json') - - #Test total reward - self.assertAlmostEqual(rewards['episode_reward_mean'], results['average_total_reward'], - delta=threshold * rewards['episode_reward_mean']) - #Test sparse reward - self.assertAlmostEqual(rewards['sparse_reward_mean'], results['average_sparse_reward'], - delta=threshold * rewards['sparse_reward_mean']) - -def _clear_pickle(): - # Write an empty dictionary to our static "expected" results location - with open(PPO_EXPECTED_DATA_PATH, 'wb') as f: - pickle.dump({}, f) - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--compute-pickle', '-cp', action="store_true") - parser.add_argument('--strict', '-s', action="store_true") - parser.add_argument('--min_performance', '-mp', default=5) - - args = vars(parser.parse_args()) - - assert not (args['compute_pickle'] and args['strict']), "Cannot compute pickle and run strict reproducibility tests at same time" - if args['compute_pickle']: - _clear_pickle() + rewards = get_last_episode_rewards( + "trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json" + ) - suite = unittest.TestSuite() - suite.addTest(TestPPORllib('test_save_load', **args)) - suite.addTest(TestPPORllib('test_ppo_sp_no_phi', **args)) - suite.addTest(TestPPORllib('test_ppo_sp_yes_phi', **args)) - suite.addTest(TestPPORllib('test_ppo_fp_sp_no_phi', **args)) - suite.addTest(TestPPORllib('test_ppo_fp_sp_yes_phi', **args)) - suite.addTest(TestPPORllib('test_ppo_bc', **args)) - suite.addTest(TestPPORllib('test_resume_functionality', **args)) + # Test total reward + self.assertAlmostEqual( + rewards["episode_reward_mean"], + results["average_total_reward"], + delta=threshold * rewards["episode_reward_mean"], + ) + # Test sparse reward + self.assertAlmostEqual( + rewards["sparse_reward_mean"], + results["average_sparse_reward"], + delta=threshold * rewards["sparse_reward_mean"], + ) - success = unittest.TextTestRunner(verbosity=2).run(suite).wasSuccessful() - sys.exit(not success) - +if __name__ == "__main__": + unittest.main() diff --git a/human_aware_rl/rllib/tests.py b/human_aware_rl/rllib/tests.py index 0e8cae92..16cb138e 100644 --- a/human_aware_rl/rllib/tests.py +++ b/human_aware_rl/rllib/tests.py @@ -4,9 +4,14 @@ import unittest, copy import numpy as np -class RllibEnvTest(unittest.TestCase): +class RllibEnvTest(unittest.TestCase): def setUp(self): + print( + "\nIn Class {}, in Method {}".format( + self.__class__.__name__, self._testMethodName + ) + ) self.params = copy.deepcopy(OvercookedMultiAgent.DEFAULT_CONFIG) self.timesteps = [0, 10, 100, 500, 1000, 1500, 2000, 2500] @@ -18,7 +23,7 @@ def _assert_lists_almost_equal(self, first, second, places=7): self.assertAlmostEqual(a, b, places=places) def _test_bc_schedule(self, bc_schedule, expected_bc_factors): - self.params['multi_agent_params']['bc_schedule'] = bc_schedule + self.params["multi_agent_params"]["bc_schedule"] = bc_schedule env = OvercookedMultiAgent.from_config(self.params) actual_bc_factors = [] @@ -33,30 +38,37 @@ def _test_bc_creation_proportion(self, env, factor, trials=10000): tot_bc = 0 for _ in range(trials): env.reset(regen_mdp=False) - num_bc = sum(map(lambda agent : int(agent.startswith('bc')), env.curr_agents)) + num_bc = sum( + map(lambda agent: int(agent.startswith("bc")), env.curr_agents) + ) self.assertLessEqual(num_bc, 1) tot_bc += num_bc actual_factor = tot_bc / trials self.assertAlmostEqual(actual_factor, factor, places=1) - def test_env_creation(self): # Valid creation env = OvercookedMultiAgent.from_config(self.params) - for param, expected in self.params['multi_agent_params'].items(): + for param, expected in self.params["multi_agent_params"].items(): self.assertEqual(expected, getattr(env, param)) # Invalid bc_schedules - invalid_schedules = [[(-1, 0.0), (1.0, 1e5)], [(0.0, 0.0), (10, 1), (5, 0.5)], [(0, 0), (5, 1), (10, 1.5)]] + invalid_schedules = [ + [(-1, 0.0), (1.0, 1e5)], + [(0.0, 0.0), (10, 1), (5, 0.5)], + [(0, 0), (5, 1), (10, 1.5)], + ] for sched in invalid_schedules: - self.params['multi_agent_params']['bc_schedule'] = sched - self.assertRaises(AssertionError, OvercookedMultiAgent.from_config, self.params) + self.params["multi_agent_params"]["bc_schedule"] = sched + self.assertRaises( + AssertionError, OvercookedMultiAgent.from_config, self.params + ) def test_reward_shaping_annealing(self): - self.params['multi_agent_params']['reward_shaping_factor'] = 1 - self.params['multi_agent_params']['reward_shaping_horizon'] = 1e3 + self.params["multi_agent_params"]["reward_shaping_factor"] = 1 + self.params["multi_agent_params"]["reward_shaping_horizon"] = 1e3 - expected_rew_factors = [1, 990/1e3, 900/1e3, 500/1e3, 0.0, 0.0, 0.0, 0.0] + expected_rew_factors = [1, 990 / 1e3, 900 / 1e3, 500 / 1e3, 0.0, 0.0, 0.0, 0.0] actual_rew_factors = [] env = OvercookedMultiAgent.from_config(self.params) @@ -69,11 +81,13 @@ def test_reward_shaping_annealing(self): def test_bc_annealing(self): # Test no annealing - self._test_bc_schedule(OvercookedMultiAgent.self_play_bc_schedule, [0.0]*len(self.timesteps)) + self._test_bc_schedule( + OvercookedMultiAgent.self_play_bc_schedule, [0.0] * len(self.timesteps) + ) # Test annealing anneal_bc_schedule = [(0, 0.0), (1e3, 1.0), (2e3, 0.0)] - expected_bc_factors = [0.0, 10/1e3, 100/1e3, 500/1e3, 1.0, 500/1e3, 0.0, 0.0] + expected_bc_factors = [0.0, 10 / 1e3, 100 / 1e3, 500 / 1e3, 1.0, 500 / 1e3, 0.0, 0.0] self._test_bc_schedule(anneal_bc_schedule, expected_bc_factors) def test_agent_creation(self): @@ -91,22 +105,34 @@ def test_agent_creation(self): class RllibUtilsTest(unittest.TestCase): - def setUp(self): + print( + "\nIn Class {}, in Method {}".format( + self.__class__.__name__, self._testMethodName + ) + ) pass def tearDown(self): pass def test_softmax(self): - logits = np.array([[0.1, 0.1, 0.1], - [-0.1, 0.0, 0.1], - [0.5, -1.2, 3.2], - [-1.6, -2.0, -1.5]]) - expected = np.array([[0.33333333, 0.33333333, 0.33333333], - [0.30060961, 0.33222499, 0.3671654 ], - [0.06225714, 0.01137335, 0.92636951], - [0.36029662, 0.24151404, 0.39818934]]) + logits = np.array( + [ + [0.1, 0.1, 0.1], + [-0.1, 0.0, 0.1], + [0.5, -1.2, 3.2], + [-1.6, -2.0, -1.5], + ] + ) + expected = np.array( + [ + [0.33333333, 0.33333333, 0.33333333], + [0.30060961, 0.33222499, 0.3671654], + [0.06225714, 0.01137335, 0.92636951], + [0.36029662, 0.24151404, 0.39818934], + ] + ) actual = softmax(logits) @@ -124,16 +150,19 @@ def test_iterable_equal(self): self.assertFalse(iterable_equal(a, b)) def test_get_required_arguments(self): - def foo1(a): pass + def foo2(a, b): pass + def foo3(a, b, c): pass - def foo4(a, b, c='bar'): + + def foo4(a, b, c="bar"): pass - def foo5(a, b='bar', d='baz', **kwargs): + + def foo5(a, b="bar", d="baz", **kwargs): pass fns = [foo1, foo2, foo3, foo4, foo5] @@ -143,6 +172,5 @@ def foo5(a, b='bar', d='baz', **kwargs): self.assertEqual(expected, len(get_required_arguments(fn))) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/install.sh b/install.sh deleted file mode 100755 index d8fed619..00000000 --- a/install.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/sh - -# Install git-lfs for OSX -if [[ "$OSTYPE" =~ ^darwin ]]; then - if hash git lfs 2>/dev/null; then - git lfs install - else - if command -v brew; then - brew install git-lfs - git lfs install - else - echo "Please install brew and run the install script again" - fi - fi -fi - -cd overcooked_ai -pip install -e . -cd .. - -pip install -e . - -conda install protobuf -y \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..c8d63a0a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,17 @@ +#this file provides an alternative to set up via install.sh +#mainly used to speed up github workflow by taking advantage of pip cache action +GitPython +memory_profiler +sacred +pymongo +dill +matplotlib +requests +numpy==1.19.5 +seaborn==0.9.0 +pygame==1.9.5 +ray==0.8.5 +protobuf +tensorflow==2.0.2 +-e ./overcooked_ai +-e . \ No newline at end of file diff --git a/run_tests.sh b/run_tests.sh index 9f264a50..16db8afa 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -4,22 +4,14 @@ cd ./human_aware_rl # Create a dummy data_dir.py if the file does not already exist [ ! -f data_dir.py ] && echo "import os; DATA_DIR = os.path.abspath('.')" >> data_dir.py -# -# Human data tests -cd ./human -python tests.py -cd .. - -# BC tests -cd ./imitation -python behavior_cloning_tf2_test.py -cd .. +# Human data tests +python -m unittest human.tests +# BC tests, skipping the LSTM tests by default +python -m unittest imitation.behavior_cloning_tf2_test.TestBCTraining # rllib tests -cd ./rllib -python tests.py -cd .. - +python -m unittest rllib.tests # PPO tests -cd ./ppo -python ppo_rllib_test.py +python -m unittest ppo.ppo_rllib_test + + diff --git a/setup.py b/setup.py index 0a7e848c..7b59f4f4 100644 --- a/setup.py +++ b/setup.py @@ -2,23 +2,26 @@ from setuptools import setup, find_packages -setup(name='human_aware_rl', - version='0.0.1', - description='This package has shared components.', - author='Micah Carroll', - author_email='micah.d.carroll@berkeley.edu', - packages=find_packages(), - install_requires=[ - 'GitPython', - 'memory_profiler', - 'sacred', - 'pymongo', - 'dill', - 'matplotlib', - 'requests', - 'numpy==1.19.5', - 'seaborn==0.9.0', - 'pygame==1.9.5', - 'ray[rllib]==0.8.5' - ], - ) +setup( + name="human_aware_rl", + version="0.0.1", + description="This package has shared components.", + author="Micah Carroll", + author_email="micah.d.carroll@berkeley.edu", + packages=find_packages(), + install_requires=[ + "GitPython", + "memory_profiler", + "sacred", + "pymongo", + "dill", + "matplotlib", + "requests", + "numpy==1.19.5", + "seaborn==0.9.0", + "pygame==1.9.5", + "ray[rllib]==0.8.5", + "protobuf", + "tensorflow==2.0.2", + ], +) From 938bceed2ce26bdf93bba3983657c07010bcd1f1 Mon Sep 17 00:00:00 2001 From: jyan1999 <49133332+jyan1999@users.noreply.github.com> Date: Wed, 12 Oct 2022 13:07:11 -0700 Subject: [PATCH 35/38] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0aa25938..c1b89b2c 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ Note: if you ever get an import error, please first check if you activated the c If set-up was successful, all unit tests and local reproducibility tests should pass. They can be run as follows -**NOTE**: Existing tests **_DOES NOT_** guarantee reproducibility. It is an known issue with version of ray\[rllib\] in use, and we are working on updating to the newest version, which should solve this problem. As a temporary fix, setting sgd-minibatch-size = training-batch-size increases stability +**NOTE**: Existing tests **_DOES NOT_** guarantee reproducibility. It is a known issue with version of ray\[rllib\] in use. We are working on updating to the newest version, which should solve this problem. As a temporary fix, setting sgd-minibatch-size = training-batch-size can increase output stability. Due to the randomess there is a slight chance that some tests can fail intermittently by not getting the expected total reward. This is an unlikely scenario and can usually be fixed by rerunning the test. @@ -237,7 +237,7 @@ pip install -r requirements.txt to initiate those variables -The reason this is needed is because code files refer to the subdirectories as modules, and we decided to use pip to automatically add the submodules paths. This command invokes the `setup.py` file, which looks for packages in the _human_aware_rl_ directory through the `find_packages()` call, and register modules found so they can be referrenced. +The reason this is needed is because code files refer to the subdirectories as modules, and we decided to use pip to automatically add the submodules paths. The requirements.txt file allows pip to register the current directory as a packege. Pip will look for and invoke the `setup.py` file, which looks for packages in the _human_aware_rl_ directory through the `find_packages()` call, and register modules found so they can be referrenced. # Reproducing Results From 32b1b0ed6ebd71433600e292810df05ab3350771 Mon Sep 17 00:00:00 2001 From: jyan1999 Date: Wed, 19 Oct 2022 23:08:42 -0700 Subject: [PATCH 36/38] update ray Updated Ray to 2.0.0 and Tensorflow to 2.10 Updated model configurations to comply with the new API --- human_aware_rl/ppo/ppo_rllib.py | 22 +- human_aware_rl/ppo/ppo_rllib_client.py | 8 +- .../ppo/ppo_rllib_from_params_client.py | 8 +- human_aware_rl/ppo/ppo_rllib_test.py | 29 +- .../checkpoint-500 | Bin 185376 -> 0 bytes .../checkpoint-500.tune_metadata | Bin 214 -> 0 bytes .../progress.csv | 3 - .../result.json | 500 ------------------ .../checkpoint_000500/.is_checkpoint | 0 .../checkpoint_000500/.tune_metadata | Bin 0 -> 34495 bytes .../checkpoint_000500/checkpoint-500 | Bin 0 -> 568778 bytes .../config.pkl | Bin 2544 -> 2578 bytes .../ppo/trained_example/result.json | 500 ++++++++++++++++++ human_aware_rl/rllib/rllib.py | 95 ++-- requirements.txt | 6 +- setup.py | 6 +- 16 files changed, 591 insertions(+), 586 deletions(-) delete mode 100644 human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500 delete mode 100644 human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500.tune_metadata delete mode 100644 human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv delete mode 100644 human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json create mode 100644 human_aware_rl/ppo/trained_example/checkpoint_000500/.is_checkpoint create mode 100644 human_aware_rl/ppo/trained_example/checkpoint_000500/.tune_metadata create mode 100644 human_aware_rl/ppo/trained_example/checkpoint_000500/checkpoint-500 rename human_aware_rl/ppo/trained_example/{PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000 => }/config.pkl (54%) create mode 100644 human_aware_rl/ppo/trained_example/result.json diff --git a/human_aware_rl/ppo/ppo_rllib.py b/human_aware_rl/ppo/ppo_rllib.py index f636113e..c2ec7160 100644 --- a/human_aware_rl/ppo/ppo_rllib.py +++ b/human_aware_rl/ppo/ppo_rllib.py @@ -1,21 +1,21 @@ from ray.rllib.models.tf.tf_modelv2 import TFModelV2 -from ray.rllib.models.tf.recurrent_tf_modelv2 import RecurrentTFModelV2 +from ray.rllib.models.tf.recurrent_net import RecurrentNetwork import numpy as np import tensorflow as tf + + class RllibPPOModel(TFModelV2): """ Model that will map environment states to action probabilities. Will be shared across agents """ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): - super(RllibPPOModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) - # params we got to pass in from the call to "run" - custom_params = model_config["custom_options"] + custom_params = model_config['custom_model_config'] ## Parse custom network params @@ -33,13 +33,14 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k # Apply initial conv layer with a larger kenel (why?) if num_convs > 0: - out = tf.keras.layers.Conv2D( + y = tf.keras.layers.Conv2D( filters=num_filters, kernel_size=[5, 5], padding="same", activation=tf.nn.leaky_relu, name="conv_initial" - )(out) + ) + out = y(out) # Apply remaining conv layers, if any for i in range(0, num_convs-1): @@ -68,7 +69,7 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k value_out = tf.keras.layers.Dense(1)(out) self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out]) - self.register_variables(self.base_model.variables) + #self.register_variables(self.base_model.variables) def forward(self, input_dict, state=None, seq_lens=None): @@ -79,7 +80,7 @@ def value_function(self): return tf.reshape(self._value_out, [-1]) -class RllibLSTMPPOModel(RecurrentTFModelV2): +class RllibLSTMPPOModel(RecurrentNetwork): """ Model that will map encoded environment observations to action logits @@ -96,7 +97,7 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k super(RllibLSTMPPOModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) # params we passed in from rllib client - custom_params = model_config["custom_options"] + custom_params = model_config['custom_model_config'] ## Parse custom network params num_hidden_layers = custom_params["NUM_HIDDEN_LAYERS"] @@ -130,6 +131,7 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k name="conv_initial" ))(out) + # Apply remaining conv layers, if any for i in range(0, num_convs-1): padding = "same" if i < num_convs - 2 else "valid" @@ -173,7 +175,7 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k inputs=[flattened_obs_inputs, seq_in, lstm_h_in, lstm_c_in], outputs=[layer_out, value_out, h_out, c_out] ) - self.register_variables(self.base_model.variables) + #self.register_variables(self.base_model.variables) def forward_rnn(self, inputs, state, seq_lens): diff --git a/human_aware_rl/ppo/ppo_rllib_client.py b/human_aware_rl/ppo/ppo_rllib_client.py index 593e9540..e7b96f17 100644 --- a/human_aware_rl/ppo/ppo_rllib_client.py +++ b/human_aware_rl/ppo/ppo_rllib_client.py @@ -2,6 +2,8 @@ import argparse, os, sys from overcooked_ai_py.agents.benchmarking import AgentEvaluator import numpy as np +import warnings +warnings.simplefilter("ignore") # environment variable that tells us whether this code is running on the server or not LOCAL_TESTING = os.getenv('RUN_ENV', 'production') == 'local' @@ -258,7 +260,7 @@ def my_config(): "seed" : seed, "evaluation_interval" : evaluation_interval, "entropy_coeff_schedule" : [(0, entropy_coeff_start), (entropy_coeff_horizon, entropy_coeff_end)], - "eager" : eager, + "eager_tracing" : eager, "log_level" : "WARN" if verbose else "ERROR" } @@ -328,7 +330,6 @@ def my_config(): def run(params): # Retrieve the tune.Trainable object that is used for the experiment trainer = gen_trainer_from_params(params) - # Object to store training results in result = {} @@ -340,11 +341,14 @@ def run(params): if i % params['save_every'] == 0: save_path = save_trainer(trainer, params) + if params['verbose']: print("saved trainer at", save_path) # Save the state of the experiment at end save_path = save_trainer(trainer, params) + + if params['verbose']: print("saved trainer at", save_path) diff --git a/human_aware_rl/ppo/ppo_rllib_from_params_client.py b/human_aware_rl/ppo/ppo_rllib_from_params_client.py index c150f35d..1233fa18 100644 --- a/human_aware_rl/ppo/ppo_rllib_from_params_client.py +++ b/human_aware_rl/ppo/ppo_rllib_from_params_client.py @@ -291,7 +291,7 @@ def my_config(): "seed" : seed, "evaluation_interval" : evaluation_interval, "entropy_coeff_schedule" : [(0, entropy_coeff_start), (entropy_coeff_horizon, entropy_coeff_end)], - "eager" : eager + "eager_tracing" : eager } # To be passed into AgentEvaluator constructor and _evaluate function @@ -386,7 +386,7 @@ def run(params): saved_path = params["resume_checkpoint_path"] if saved_path: - trainer = load_trainer(save_path=saved_path, true_num_workers=True) + trainer = load_trainer(save_path=saved_path, true_num_workers=False) else: # Retrieve the tune.Trainable object that is used for the experiment trainer = gen_trainer_from_params(params) @@ -418,8 +418,8 @@ def main(params): # All ray environment set-up init_params = { "ignore_reinit_error" : True, - "include_webui" : False, - "temp_dir" : params['ray_params']['temp_dir'], + "include_dashboard" : False, + "_temp_dir" : params['ray_params']['temp_dir'], "log_to_driver" : params['verbose'], "logging_level" : logging.INFO if params['verbose'] else logging.CRITICAL } diff --git a/human_aware_rl/ppo/ppo_rllib_test.py b/human_aware_rl/ppo/ppo_rllib_test.py index 30c6923b..a0690a87 100644 --- a/human_aware_rl/ppo/ppo_rllib_test.py +++ b/human_aware_rl/ppo/ppo_rllib_test.py @@ -41,7 +41,6 @@ class TestPPORllib(unittest.TestCase): def __init__(self, test_name): super(TestPPORllib, self).__init__(test_name) - # changing the cwd to where the test file is # default parameters, feel free to change self.compute_pickle = False # Reproducibility test @@ -61,10 +60,8 @@ def setUp(self): ) ) # unittest generates a lot of warning msgs due to third-party dependencies (e.g. ray[rllib] using outdated np methods) - # not a problem when directly ran, but when using -m unittest this helps filter out the warnings - warnings.simplefilter("ignore", ResourceWarning) - warnings.simplefilter("ignore", DeprecationWarning) - + # not a problem when directly ran, but when using -m unittest this helps filter out the warnings + warnings.filterwarnings("ignore") # Setting CWD os.chdir(os.path.dirname(os.path.abspath(__file__))) # Temporary disk space to store logging results from tests @@ -121,10 +118,11 @@ def test_save_load(self): ray.shutdown() # Where the agent is stored (this is kind of hardcoded, would like for it to be more easily obtainable) + # 2 checkpoints(checkpoint_000001 and checkpoint_000002) are saved + # since we are only interested in reproducing the same actions, either one should be fine load_path = os.path.join( glob.glob(os.path.join(self.temp_results_dir, "save_load_test*"))[0], - "checkpoint_2", - "checkpoint-2", + "checkpoint_000002", ) # Load a dummy state @@ -187,7 +185,7 @@ def test_ppo_sp_yes_phi(self): # Please feel free to modify the parameters below "results_dir": self.temp_results_dir, "num_workers": 2, - "train_batch_size": 800, + "train_batch_size": 1600, "sgd_minibatch_size": 800, "num_training_iters": 30, "evaluation_interval": 10, @@ -199,7 +197,6 @@ def test_ppo_sp_yes_phi(self): }, options={"--loglevel": "ERROR"}, ).result - # Sanity check (make sure it begins to learn to receive dense reward) self.assertGreaterEqual(results["average_total_reward"], self.min_performance) @@ -215,8 +212,8 @@ def test_ppo_fp_sp_no_phi(self): results = ex_fp.run( config_updates={ "results_dir": self.temp_results_dir, - "num_workers": 1, - "train_batch_size": 1600, + "num_workers": 2, + "train_batch_size": 2400, "sgd_minibatch_size": 800, "num_training_iters": 30, "evaluation_interval": 10, @@ -246,7 +243,7 @@ def test_ppo_fp_sp_yes_phi(self): results = ex_fp.run( config_updates={ "results_dir": self.temp_results_dir, - "num_workers": 1, + "num_workers": 2, "train_batch_size": 1600, "sgd_minibatch_size": 800, "num_training_iters": 30, @@ -262,6 +259,8 @@ def test_ppo_fp_sp_yes_phi(self): }, options={"--loglevel": "ERROR"}, ).result + print(results["average_total_reward"]) + # Sanity check (make sure it begins to learn to receive dense reward) self.assertGreaterEqual(results["average_total_reward"], self.min_performance) @@ -309,7 +308,7 @@ def test_ppo_bc(self): def test_resume_functionality(self): load_path = os.path.join( os.path.abspath("."), - "trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500", + "trained_example/checkpoint_000500", ) # Load and train an agent for another iteration results = ex_fp.run( @@ -323,13 +322,14 @@ def test_resume_functionality(self): }, options={"--loglevel": "ERROR"}, ).result + # Test that the rewards from 1 additional iteration are not too different from the original model # performance threshold = 0.1 rewards = get_last_episode_rewards( - "trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json" + "trained_example/result.json" ) # Test total reward @@ -347,5 +347,6 @@ def test_resume_functionality(self): if __name__ == "__main__": + warnings.filterwarnings("ignore") unittest.main() diff --git a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500 b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500 deleted file mode 100644 index 03752ea8362dd953385d305f6e2a046b697fac91..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 185376 zcmXVXd0bB47j=V1Nt%mDDN&Lks^{z@8qE;=6)G44frJL`1d)1aICL93(pL{$nwb zsQ-;i4UKzk@(bM<91)d}tsrFd#_WBY1tdaj>7?rVYknp_}|djQD__ zLXo{ea#o@nf=0v#$uAEYX|Pb#V4=j&qPb8bG)Uq9$9sl`2X2%N8s#kZe@|^@kYZv& ze2~)eAZ3Gv;?AN&V+c4;F6Yw>#rEpKU55Q4{{*nUO$fc zpvtd)k_J~(Y5ui!0(`g}0p=UV;c6RO9w{P@U%SskZFvPucb-Tq9TG^GSvp%Ly%|RB zC=rCeHigLTPPFstXxL^IgbIG;WPQvQIQQO)PARfwo36fqnR*8i#^y5x`&jyXSqfLO z&gS=)tl$>W1-S9#B(9`%m24YpKp%FW1()RpeC{+E-kq;Wm)WnRA1V!KqsYM0qd+j+} z*IX00e?Z9pTOa z(O{aIhzb(AJl69dmPHm(J?r^E-J69$+!r-U7qMY_yI{dUCdfR_29XEeMB?c}n&@!? z2e+AWI~Q~Qh<=2JpMK%rC$p(&c{oIm9KoBGx#EtsU-9R^9(Z{FD6~1tKzZjs+<#n` z-~E<`W9OUTzda4`$MYQUv3|UyIFg;({SRy+6G1)ZFj(D6#p7{TVf2<#czwr}7v~Fw zs#db}Md@Z7qWP-{KFfCE=2jQla#EGrK31epE{~@&bJoJW&8K0G*I&pkT}KU9ou-=` zQqg{X6Vd5x0d2+I)F?)jJGvy2xA`{w^nx!4o3Ai~$z?3OR~^57?juuAoF(Duv&f?K z>(sn#I;}Qzfy#^$Xc|=yEnX*35Y>)HuPme^$7t{;hKIl;W(l8QPbscV#~O)UFc{N} z=N{Fv2)iGcFw9jLpIb*p3!dTC9WnU2%9;Lq6eVzIQAH};gJ~BNP|oow6VqKlo3BUV z%Sr-QcRBGNuaNXfCef$qS~N#ljqaMYlVn5&V04QUtnNxdTlW~~-kXowo>|aqca zizpU{&vFR@$$Mo;CtSreA7v2})89&_6N(6BcHk9}=8gh>M5!YHvEH4`c z{kGi?nkn5<=wS(_Dcm|Zoq7E#3>+X2=y z-V3|K=7GYPyTXIl7vomlsc?Wl$NK{FTTn?(FF8(B{YQgQB9aMa&&kiz z^Wn*01Ce%}hwi1?WcTG55Ge_&mayN8M#IvHUn&4oo&^rGOUZ=GtDttnQ9)mvH*3w< zj)A#J!WE0ZkUkAA*gv_4xyj97%Gutqf3zK@XFX(#65>hunmEC*MP)?cy$z|Ayd*Sn z+X1E0fw2U4*U+lVT#@FXn!beMYp~UJq_*V29e9Pan;i?B=}{^i}10o4-@Z5^O!p)h(KJ0@3Yn-5n~Ud)mBrgKBt;( z2#bNL<;y|(=?)Z$`da0-F962ekAY4tWAv;nCV{`A!9sf)nxBxtpT{Rd?P0XYlsAWk z@!?o?^1aZZ$`tZms*My7oPB41k(DqVA{qya;8{^pNsHd@ieH2QkwKKDQlB}d&6&g-#Lbns5IB&u$mOn}vTmD5r zn`;UNS9rl)o9S?&J_w_N1}#>;--L>N!*Ihn3N>P{*`_DyJ)q{>4>&-f>}b0E=He-z>fwK;be3;zS{qdT)bxvIASes zk@!!rs$e1eE0}`!cU{6u6I?+lS4esrbTKJxE`NG#HGTH^9Q$?uFk4`J2G)Fi4|>xC zT)6m>uzmSPj67#YV=||5Zm!4Ao+`xsGkhUA_B5R9Jxtwt9Ps0VDo7Sdpcw&qJW~Gw zuCPAEd(?LGf0>2oSguY*FKf^St9Tf3SBtLG7zb%(_gL_K%D>fV(x4T#T>GLw%^lH? zYrk(J-EMhw=3p!zbx9i!ZjT555_^8Sw+k=zkH#xS7rD}`Sni(SOFeGY!_HP+^3dxC z^qH&hJzASEFl`_A9>OewBVo)$sKw*j47tMGw{W#92y{0Vpn~ccnC)rGue=}0=NH?; ztmw1C;;}b}a`(`2+*ZZeov)!V{vA&L=FG-xNTTw<5z_FvQV=*wf|S?C~Cfg zM5U()e;3REY3=c#aKILnzeTdRDogxZ76sZb@`aXTE|KzC>!DwNfFzuqjG3x>%%N40 z&AuIld4kQt&O*l4s*Z%8FWt%4>!FxuIS6zA3Q72*RPc0G0Az_$k8WZdiyF& zDlCENUuOxgNan&1&C}2jUqois&g9w^8Q?rV9V;IhQ5}yKGV!Jp95FbJ#&;G#+RK^n zdwevG+EmQ$Tr2>!a1kn98ABqo_T$>aZg6A%NoKV5H1p|MPXBYQBF986@Qu=A)_Q+G z1TAl6GVhB}HSQ<=NSeo-cFPg3JZIkWbU8?DdPYP|)#>^->+rFfCZ8iXNt7-e27A~9 z;R~O`wa-qtdGH#uUmXS}BQN4a$JY?%G@Q;ab$|!Iy+Gw>nMKe2LoiKpCVX&Dfc)-- z_-5h)^j{>(>pD{5>_|71dLPK9w&~F?pT5J1c2h9TJ_R!%hRc0$q!;FOV!(lgyuri{ z>caPeZK*$%32~&kc@LoWX@AYt9S?BjM@`tgy#`#KrE&Qc!}#yduW^K`KRITxlP)^z z&5K*q>BrJCXsFJHS5~v}Ytu!%(iX)WTot+Xl~b^q6wxVL?5X4P(Ohd+1l63>#M*r% z`R18>i1OADYPekvNB(i)lf6nt-#&7R*bKVO^cdXxY?71b!>A zE+`Ls3dh3L@4xUz$rAE%@_BwLC!J^9ynyMie#0LF8S2`z7r(bp!}=?+U_b2`o|$kI zYW7UR@+*&pk|RubphF@A91@{wfx{ti97mHALJ;5o2XuA?qP@*Ws8)&xTQ^;J`mhUZ zbj10Ew9BCK_?+PH&P4Fst7ksX*&GcbI{i5hOaaox(pA3rqOIJd3}PwBzQ4s zzF8#5N(}~qttUDz&LWH7+=n;a#@PIOGnSiv#Yta9NV|(MA34{M@jG#_B~OeeJuSD` z9r7I`F75}hqGHxD;yw&3RiV~>Uo6H>7vavlOd z%;GX=mR^9x(rVmoMH4LXm4>f=w!Br*nNNP!#g;0M<8@v}v^{wf_Dg)m{N$^29T`D4 zPW?fi?tTE<<^F@RMrEp)-vJBitH3BM3bswp1LKpCe6eX55yp4Ij~mUPxK5r6T{196 zN0gpAw;b)F#mTXv^C+7ofgww)K%w?C7)zc2wORr(&yIoX{x(S1t8Y;rsLJPeCNu4l z3bJopCwn@OK|Ys!#Bq&kFfrQ$+CF~72Z<|T>~sm}?--9=YjjDl%_H(8>MW%1S;cyG zuEV1kgxf#PUXbilK6tah5=T_aV%9kie6+KPnT2<=&W)qt{S;9w zpSunMi|(*5e`Z2dl3|UB(G0?Vt-&QTmty|`JB*rEE(p|`38wi{n7ZX4vA8oA_L%KJ zucgvh-qtJ3blF0lEtp6K`quz4I?ZO58-h>kQSwHh4`Np)M9JPW9OYp3^SfsP6u&MP2+0E`lhr2jFcRY^{-8HCIC`W2`+=QG;4!^fin{!V6@drjHW+)#A>u^IYiEo1$&Z;+H*rff`S1m+Fw z5Gb7O6;zE$BP2qd{o=rd$BO#-i%D>l`2B_B31q+c^xkMAr8NDP7!NAS7zP#2d|i_Vb`!E zR7+CF^n`erHL0Fi)|CoGB7B+gDPtH`Wej~Icfl}MA5i}Bp7lR=Muq(=FlqH3I3AjS z#A7?Krd*;vn1DV>uSinYHeBVh7wW6Ov#1EPD0$dO2EtwNitAkVe*Sx=K6{VwdH8j~ z`PIqnNzYdnod1MD@>Ei0RbJDGqsXJ?+j#J51_r#e!FdPD7^(j&@asN^E<-)3u7MMn zOq_wuMb0FDs2}ksuh>Y@ckD(ugWFU0psdGa-28Jq7(05Q)QL~1fBhzI88G7eRwu#U zU6Nqpkj9^OL~zZM`{|7%KS1~B5qf&?1yP&R4)R^Yh`MtI%(rSM~@HrRrr zKFJHdFU=;R8lL=Z+!rj1b)nDfGlk(^D+KA^3c=^_B=%{VF>Xwgf+rsjL0QXbKB4gx zZ(Z^R3xwlnX_qNi&AcMq+M-H77|o|gP11#=Z8`n7M2r3!o6PTuU*RV^JopHj&Cf(W zBdxOv@LPX1Zl8G0;#|) zGg&N|C!+;9cUIupv396EVXM${K`fMPyGrb}0j4ikhWg}a>@zPTb#dO<^4}(uwQgsj zz3bSec_NVayi)+@qtJbO3~nF&gguSigm;^kG2;tXK<@s-^qQsUwD}gKzC>`@s6++$ zPmu9wI4oZC1ugwIlV>MGVe(2TaZR)1uYW1}JY z?nqcQQ5#baoqPR@fvLTX`2N-zK)+5wN`NBItoHzM_pvBAo=oI~ z>F|2?RgnDXzz-ih%w*(lLW6$*`1{1rT!RsGcTEHse_lZK67;ymks|yO`h#2xa;9$z z_foYHL%2|FER9KQ!@wa9X4AfT;F;G)Dq{bGlcq-8;N9w)N;Mf?C2PvRoXNzY+{$i! z>cq7j)mT1xE}tRO$JY0xgUye{c(W)B|(B3SLOg^M+Z(J9OWMDITaS?{fM_-HRGvs;Wh2L$uUc40#Ei~uLW>EiHkqqydL*+zg z@Z?D+s35b4>DwM+O_u)LDzg-eWoqa)@hp60qKjd<4{*85ei{+>1Nn#BxGc38hsE@f zwj7<3$?30(C^ca?^*JEV zXPv^rs3qA_q#6eM7^KC6;J{E(B z?M8f-<|LH1IRILN#lq`9){&Lh`eEsg5me@AD33KQS0D&uPJG z$!@Toc%I}m|A6p$+mWl-P|>WL#HIAF!1VfR9Obwh|Fz%5$N9zFtndrIF_1!sYDM}i z;|Xl2uO#`lN3g1SHP4UUi>a#VvQrRZ#6cA7lGSBpnx)j-^jUN{qj{ zD|1Qa?<6J?;lZ^g|0Ly^q9_eotmx3LnGrE>6b=uRGc&V)N^`eLt9 zGM}9l!>tzl2bF_S=;QVpB<||KrPX8EgW02@ZB80i-3ev;o`>P+;5LEl+B66jG+^4; z3<&w+N){&@kRIQ7_T8oef1iGipNIm9G}s1@jwRxQwXcO)D@p`Eln!Flye!h!av0a{ z=_l3P0X`W$Bq~KY>~P{@eA_w?4G+GD*~QDr#VxsHQ}=Ax8YW_4S|knAa<0O8f^MUx3o*Jr01lCQ;COPwz@${b(QnVukeQh(br*Nx}g0elq@M@lh@0`zF2eosQHIUD#~K#A8p+F zDwll?Dq8rEcUCvBjv&YQelyZYv;~~3gJw! zye=iQo)w5s;vbL{S3exMS|E&&jfP!%Ga)(TAUOeX>_vY9Ry%Enuw_SuT|OCv&rQIc zWywVEj2C2{pM&vpdzs6(3g#$$j^3uV7G=k*Sz+-62vnI$UJOXUGOtV~a;*sRN4P-9 zDSM2nlVsyR$D(xQTBseqK-gDzS)kMA1ksv0Xy#tT#%`N|Zgv6SHvan`c}mYMW#8DVbi+En*arDu&fX|iCt$CBvvtl z#Wz3^&a)SbJRzV)jAtu;0ol_3V9K5p7#^R+R30mG{-};DXewvH#Fs0nEP@5ce&I7^ zDV~ZR*mGRKKV1@~2Ucj&Gaqxo-Hk)X?MtAhR0`VYDCl35LRO?HfMv&t8p8#~_7nn{eAfvFF0+}AvTzGEfcet8Hz zr(R=)t=oyozbWucZXw+NQNk>L#0qb$3qT#^Nsw1pO&*O}4yi|ekqw_tGXJVTwCI<^ z&#z*zKfHq6{izLpaYNie^DZcfvLHvVspF#QXIZ-E5WoJ)15Y2f!iq`{eCBA+zQxVMdPdQfNJya@?6MHWx$XGIfy4kAhJ(Bj90@DF3Xk0MAz~BCg3#(cNASm;QGE zm$HvUKWGtt4&8*UiQ{n9tZ0Z0TnkgqPQhin3S4_-T`NmBZ$A^biThyk-B-ugXd8dYUz9sJZl~^wqBpy|7cA$*cl=d zJ_bfU(Gw(l|7M$>>oWK4lPq?)ETt#qhH_qRK3%=r0}I^V!05?hur67TA2Cn{nTa3J zZPsRHAbC;v$5D~Lwu2ak9T&r?L zsQ*NZ)>J0@PNpgz4h@A@)mm)9bXC42@GrjX6UF`>Jzn~J4BxI{ z4Nr}#aOSo^SaP@;PVHL_k9Y6KkJ?K7!NdsoGNzRnn`IKOb=vfaz6qDL48;J;nOLTE zl^hNogf&YI`L1#h#&ul)oq!ocbfP$G{$)b1S{K6l;fvAeo-(amxP*EMlEM39AkKV$ z0nXWeCx_CWv)!6%WNnQO&H0%pa9Hn2XRp-abE7RFd%h+A`~!(`rvxk}A~>c`mq#0n z9=dOa0NREq-#(seuK9tvIi~!@-yFQ`Qir0pD{!FP0b|T7g{>Q(lKaJT@W9Du&@5Jk zifS#;YV`$sgttI8Z8QDqdxeePcZVGs5aU^|%0XRRlbTJkgn)l)a5?2J9D7)Zd;Zwr z6Q$!Qx`WW_^f}Bdq=z`$UuRaQjxwj^;?z#h2qosYL7S{6dME6qVJ_ZyCPad2z9!sA z)(#`5Nb&H~+4$pi9hv#A0Lzvq5dG1FXEj}g(`+(GJr$uh{QUWdfjqvbe=;BO#h9<` z*~|TwePZJZyWq^D9b{ry23x(dikR&mN$U))`RZF&VcJ>+d@38m-2~F~XRW7LXexR~k8-bL=sWKeIem8lTqqg|ZKrPu?QgdW z<2Fyh{Y4EpDJl^^uUW%W;^*<#9rD!LISJm+lBb6&VgQ9TAo;2umKSD_(DwIWT=9wg zGZth1$K~1j!cb^gtV!#Yd!aE;i%)d*#1fAu7;;2~JLJgmX?qQxuD9Yx z1?%zMtPew)<37xG72%jrUcVzJx=& zh#~!N`)9$;&~Y48lCkSoAr|! zpu|U&xj(aqd8y0btmI;B-gt&-ha6%4F$sb!X)o3zrBx&4w?i;`wHKc6hZp_T;WjfR4@SJpi^~;mmc%a||v8n%0Fm~ZhyxP1N|K+5k>D(69H+mS0 z-Dw6AfjT&1QxYx~+yVW$CNMELnuVK{!tObHnbOxZycgpPwx{&LPS03Sux~PCtd)Sm zGgGl3LmgYg%kfQcv|!Z6BLr@`_ov6 zn@!+-LL48Bcg4E$J|?pMUbWd1eNw+o0WP(~z}<-}L1a&c1xJb^if)30LLcZmmqBhF zhyZ1kxe%x|1HUwmhKldUnf*{bNSC`wd^Dvyw#B@}YZ zm%<(gC!yt3du%>30#2uGA}$xdk)xeOpg8IEsSnsQV+DSD` zpNd1&pYt^-VUt;ENDRzcID}bK7o)-L`^4UNH4JO)BNn%>vigNekjm$ghJ7!|sSPRE zSJXt(RcztGmt<6*jifGbH70}{CG}ByXzy-A>eP`9HfzHz^DQLZAPV(Ij0gFATl~6n zCraOqXUbiiOqicT%2)gmXjRMMd-oxoPU|+Sy>r20Y(qVpI(!yxA6WrTCHj0@O9Jfu z{s01Jb;16<5e%LUG>ytoUAAFjOcpFHXY*x`pM-8B- zUxVJ>_aC0MX(um~bYOW!FMIRrJD%R0Oj;JIqR$HgeXi%(dd}pB=F*7DF16n{F?kA>ZGYf zmzN4^Rq0^#O;z|bOQ z!S;h(u>LWK{ksh%Gk<|fW&$+Umyr%fRo>RP1XjMdjBSFc+@iCMEdOs99u(K7=fc0B z!phl5!zw}K;9mHnyo|e#>xcbUT7l)<<%1DQ@HFWVPQN^gzOui}@7>+cBi>x3k zwQ38W9Vkh?QaV6GZ6!bYbvC!rJdVFVZeZzCZP=L9RZKc43~KGx2v--sVMUrlo)U>_ z^VV&O5F7A;t?e>|LE#%_9~nxd#u=d4oMblLGmh0ZXbX3o%VQ;LlcBOB3H#d?!n3`Z zgqu!q7WHX!u#&?C0>hslXz5$v2PK}UQa!#BQZz)yKH==7f~RcjrE z>+<5D>a-61%!7k$;`tw9h_ToND#j#$Uikm9B14h zm9;m>Y&#vky`Y`l2n*#|b~C{#Z6>~qPK8i#!cx~)!UoMBu=}kDtvvY_(vD4}^4>+b za%K?@99N~~3+n{jRtuf-{y|Ik4wyg16gMUAMRW5-kojDXZaC@9Wyv|={Raz4$LCON z66WH!TQ+p@lsw`y?>k28#n3J1go1`MOb|UufGxg@=mxQ8Z0jL^`1mdwtj^!WN`*Zb z+^B$)5|ZGeQZD%aejqH^Y|Q0)mw}qSD_{K6mKV3p6VAT&2OcKoV31fSuAr;=+w`B< zs}T$G_>;`vQ;SpniSy*kt^9^_0ad;$r}ro}CVFVjqdXwEAM_e7qjBgcE{Hn>_aBMU zDF*~#e$j>2*Ofv}`v(|3Y(Ae~v`o->`!d?Bt!006ykU#?BD!%&AiexeN#L)oi^2po zSaIw(q_=P6KJ1JzMSmC%IJO*(#)weobvnGHpom=fQ8L6Su7r_--S9+bGTj#`gq8ch zz?GX-T-9x;Z&R7U10r9+=LuT;)3ZFlkS090%APx`N8ql3uedpM3O#+US2?vUj($DicQ);G{VW-={bCPA%zHR;b|!6eIJC0_M* z)TBTQ|XcrQ`&H#3^Zki@Lje(5B6No4KaXh|FMLX1Rh4EAw9&?YBr5rGlt(X zSE8?e*;1|ZPXyD<#tCJ86{y;*V)91IjBnH*s<$4spcdNA-rHF4!R|N)x)+Fg+&>VF zItb@S{=r>~(qPGX37#7JahC4IE|8jC$%%g+x35=%*H)8gMr#XPa$JWa(}gr>*Gljk zAwmz1G>1Di(KK!dZ%3#m(#-V+PDjRO#wTg*?fs#bN=l})3!tvbw-7B(wRR&I)e;mvb zZ^GE+cC;{z(s$c6QMsDnjN7wVqD(Ec8|Y#2+t+yi(p4g%GZE@VtN5G{2i7;Fm5MdQ zU`cNPk#g9E@`EQ?M8$HJeb)+4JB&eV76suFu4wYTm|UfSSXwj_B>PlgXT^NXYS@W= zj@O0S`tigBZwh7eV%W@*$+)Ib0;66GVeEY}Y_Kf`cFda(0l(+sj-Lwz&30c{a;_AX zH|`g1nJG^WU%o8}Gx|(+4S5W`&yK-=HJ{k!xAAc7*gTL67U0+)F?hw#9PeDaC@?&? zjftd8U{gEfuxQE1n%rqkjDDLckSTr6d|pM8j#OEkqaBU1#x9^Pw}M#iiy=`ht%BM& zj5rKN!lAZd=#(9W@?KNG{idlvae5ql=$V3>%f_M6fnm%xx0W^}v+{xxHaJ*MG8j`EW*lCBu@GILPi0 z66aPcl6u7x-LsK>RUY!gl@*Q^E!NQ#A0>tQ5Taej5xr9VbVQ zN`P!@1yk^iBYAPV;BD2AxAN3=Hg1hLp4S#bli&}+-{YctWGn4R0Rd{yTg^a2aAwk{T8---VkQbl!UyHEud{*BPhOV zj017SY{9q~Ja=a!W@cU`a;>xR!p{VV=vD*uekuH2`<>_?lZM*%-NFqYmqNL^4k|Cb zPO@!Zu=S~<@Kow$825KLL>~Tt9n*SH;b$+ZAAZX=9n|FyAJ#&tW4ds4l?Pw(X@lUL z<3yg5rOu^fcjBvVPq-l(MM74+1>IZ&swojgBRAwjc#u6U9XExJZ8xLciUw5PFIng} zqL$sS*?_Ae^O@V2fS*qC?;_@PNsJ0Bpo)Srn_PxO}U8fTJxc%g} z_AYRH>w?>T?TLxQZV)lON5;!-M!9|^=x!y@9XE|BUr>drOatgQb%Q$fW-{>D8z-DO z$)cWG5s?PF>T=sz5a$pLE5^MfJELRKSkDzCwW?TG%qSet84E|2jYj7eA|TS`Ba{~( zjujiF;c1~fri^gI!H-+nuIkCCU7S0lB+bR#)*-FNZY6GroP+l*mbP;gENF1y zZYrvD_{JBg<*|a_(rsfQ=?v$Qsr*6IkY1X)54yXpgv%m;G#HPfMC}<|S3eK#df(ub zsW%PMb)jtmBdO^997t`c2bq}7f)a;Bvdt<07514yg#jmH1Dznu(gW+0bBTB2OzNK@ z%XPKZ!MFuoNSqQN_ZLTLr=4(D;TqJR>VpSsROtc!4F1-o(QyA1KFqZUd?mV3?S}!D zPEw;bTlc}AA9~cS@eU4{3+NZearARtqp+-bG?o64iJ!EbsqKvzzF+Gq)b(6qQbWFO zw+APAV2v6t6jkL-4cTN-WeRP%H-=lOgbls_9jH0;IJ;$k2Df>v!H#bd73@yXAneHtO8P)NiN8I!1AZdFxD~(nwLHq zsyU;$apx=^wXBRZ{gI|xmVl2nPQinx3S`A64~ScOk!;>7N)=9Xv{c*!wT2ZIYfi@D z;QY&YSMd;xJu8cjhwI?n$hAzz&b(%ObvD@`)qoGxGRTPiDA4=15w6>ehYMSmlf=16 zu=?RT{v_}TlbwD`khv%e4qTIi`Rld8-6a?99H4mKk_q1K_QSlxro4CVZn|ncvdGG@ zpy+Q3qbg?cpwMihUarg|q{VRj%6%YRP=&D?({P5K7N1a(3;lnlVB4EzAXecFQ^QxH zwZ~X0JLVy-UFD4_od?mobURlcr3i=gFA$a0f8mLC3ixa=p;MZg*o|j*(8|+^6vex- z9mC(T87qvqaLssrG$tRnMY_W!X?tot>NBX1>t?r922kg+0*!FFi5t&Ora^tAbszwzwS8WVEQy9!3W*}*3KOr>#sad0@i7@wX` z3CWyQ#ZXn{O;pCzpzq&g zp_}<09{$pq_3xO6Zx&n!`}U!}P5&fhhIfGB;21hHF9HrJtl@9}-BNn` zvo|kze~pL~nzDU=r}8gR&iMS_0vMMq3E5x{&*onT;o@@W3>4>69?J4m2XR>36b4`a z^a$Q9kHW~e=V+47Ic)BpKo`aKf$hy)+Sw~kkKY_mW8;lczfOZca516>SKbr6un*;f zikV1c1l&{Kg;w0~p;j*^aZQbAB<6)QrcaXVlvq)1u@qc$vsrNV>tB>B83l*;b)bUl zY_Jk+!Ywu;Ao<@d80$2ZT~~Le>Ec_tbU`-XwBCX?FW*OEWKwYAw%zRgo1q@6`6b4W zpFth1bLq_fpTg@yd9`-o5;XIR;Wrk@0-U;zw~v1(E4e<;?5_ZiiQ_PCmjxKRWZ~0O zb~NX>>#dOAlt?P= zwdhgB7Z5Zp1hYei!==l!AmzqAFk765ZFX0ghnqWm{8hu|Y_&wm#8e!;&=I}Q>9Yqv z)i8a{WN>!Af^i?-5Q~y1+?DN(a%bvcrdB8$5KCl_!&8}|_Xsv~bTq8_)keHSzOeNs zkJyb*UT8Qzf^D9y3%`ty*E|{FgHJ~7tZ`gC4;+_d;Vru^iwsx{3nXs~!_#t!(n?Qq z$?pz1{UjYul47B0rY)LQW zWW+QYViq`~^5nfZxNLyswY z$xI7_-iiKLcp?Hsc7LnREMGz_7kgppkye)Zb|&d8O&|g*4~&);2RZ$3#LOuceflF{ z?L8qI_V5bPEb;)Ow5w!Ik_E1{T?YjZo#Bb;QRX>m4_>L*gEC`vF{0QM)T?ZWyyH^r zys;G0*KNm!vms=9ohP_I^v6zbX*Bpa27TU7fiIpN%rkc(njDRWmfdyazxzpGZ0`nd z&MpS)?=h?`!&zWi7$wYou@|N5_M&alEa=2d>=ysdO5`2TZQC`{P;!A}4e3`!Poh|Y zvNDaW@Z#R21ewo0G&*<_vFisk%ABMF;tQc$Z3;|_PKO2Fdtpq$V$O|j!p6w!a4hT+ zo^rp5;c03I!!0Cx*w!Nf_4wU*n0 zg_t_zO?WSqeY72;GO|dgsU^(Ji5$|0dzp6ZG~x2*M{LX0HMk+i11ukV2~0ANkkez5 ziG9yy@;k;H>Mm)p!tzA8pXdc`kBtNqOuZrU>^T1Fp9MQHI2t>0-jF4FPW;$xd5ei^ zlJu&hj>X#{EoP$s(Kcr0VVzR4BX13S`H_TQ^b2mfFf9 zgyO7HA_?=h0T_iGXYU_dKm)reu-HEZN)C-7D_)Jp+KymB;D1@d(`RPmERP_}Zb&BD zjVoA#>jmL+OMRSOHjUg_HV-teD6`A@Z!o7z4e}GD$@unG=K1s;#;u)=FQ(^W&9xO6 zf7Xj-n#BDBpULH)fX z_4n5V@kR^K^pt?aO_z}$jO7cr8&KJ-33Q~|Z|w5Rh3;2&>}`yv#YBmv)JSX_6dOG! zU%hIPL~2ltle4+efCgRQoDbEGqv>9;^%(cChM&3=&h_;c(8sYMxWoM<$SN4|55KPQ zkil~FoYBE{+?_=4w)=A3wKhykFq<~tHRYsmbaF>%@7dgN<1sLkyu=67{V`r3hR27XY4s2W71ozf!~M#*S1tt> z_GiMHr*DMY>pwuYX8>sSio@H`Vp69Pfnu$uSUExk-Fmbjp>`##hGg*B(@vKB(}DhX zUXTLD*u7SYwYzv=vmW4+r?y~OrzwbSvSgC4hl7dXG}0i_B3R_Vj||F0LW51J@P&>l zZthiJ<1F%UVWlIacF17&wI~?rH^eh_R>2cCo)#U9WcSMNVG!R>#4fFdX=Yk5ujM{O z``m(vfyej^L&>z3`CMV>+qo|*tnt~jHrO6%$^V7VWqDJN;OoL%ynbjTHfv}=XuC81 z8@&hjX4q3%KY4!PnH%qVB#QeT-Qc9}L^yxho+vwSqUU2CF=MNJ5H0v*QEb+aIZ#OC z8q45!i#Wgje313T%hQA7eNn_v795`pqu=`vLYZ3(j*sGa@9h~(>uF?F*Bynm`fuQF zxj&SJSkwKN^jX@7bnu)mLkIUO(WG((7I0xCH(L6@qF~Q4Y^j+B!@VZ(@|G5Xwbx-_ zvWir4v@&EF?&IC(Uk~ZPI<%;}ntgq_7aT4n33b0X@iz?%sqLJPcD1pIS-)2e5Lr$xBxh1D@$eWYw)ytZoE8W8B{4B!3~MBFmr4lF}!u0yzVbz zPw68FT=fs1ZZkr${4uyNIgPJN8%qy&P^dg#2NV8#3GT~csqR-Uqy_*Owgit zPJR@8Bfy=j;^{rzQjGI4$L2q>_)Zt-DA%FB<6|3I>>k01zAg{?H5$`G<4N-681|s- zGCb*s<>$6ZQm30%sB>i*YtQ%`8d*a@15}c_19pl z{et*89OEA5gG7Fe$p4|}yyK~U-!M)nNw!pEWt36&I?w${T1Z1Gl1inh2&F^Qgb%Q$4J#BG&W z0g)r#L9g-*?)0UZq65Pjqq~$6bw>gFn6v!SKxUhSwLfHDmcZ$j{^0P$|u6 zom7AwR>qu*iW;`}#>2Mzb(r{mB=<1TnyFMKK!|4+Tur}+1`QXf)xFJZ%?cOHv@+)c z&W6G`{ux@zcF~`2+`(MQmFh*0Vh|+Ocl?<^C2Ktym9d_y zKc!Lsv3&wg59H{JOBP)5PhC{XJ4L2c=do{tLP&MLk0vo{+$`Z5c+#i~2TjxYjOY$* z?fgnD0`G$Q*F9+Q$QZ9^DZ}LOkCqBSA5h7{bQ9d z-gplN)aQ{r%~4P=+5pP*v>;lti}nbv5vwW5!d)vjL9&+*DZUhizukhs+q0NXK43^) z&Bo%PUzud#qC9Ym=tZUQ#r21tmC`)tT%7)N9cFe_Q{z3Q80oZ;iVwDti)%)J->>tS zet9hn&$%YhzZi>_f3^!pPfWm*=ak`eZWTQfEsC923h3@tmZ+)rn2_MIvpKNlB$Vil2=I*^zUw@|qIfs$T}T>m#vzWEl2VTLJeg8Vhf^;GtWh z+bhdL88R0hrBegEq2MspR#b5$O-Nr(lc5&J$|F*vI@--T{ zNt)Pv-;Od*g3&MDk9Mq70KJ9tQ8PPUaQsa_b#z}27H;9NH!%v9iibddLKMV4iKJ~_ zEwt>4I-R&76dTpj>MicI(L3IKWUE^++V9~r6i!xP|0Dt;eKlcMbqHqMT@2B~;rJsg ziW_I;CKMJH;d1p4*w;}4u}gAc@k?=*Yh8jOD`nU%X=`pqOFn-3bqJ5i7%^Fo8SG%M zHOsk~gU=m@aCCY(R<^Wa_VFC3{vpi;{3psL8~((W+A#DRTu$bmB3NbihWPLBfQm9- z^zbvLg=tFQ5v2lhB_D)kH+i4eEd?4Y8;b+{YZIB~SO0py5!6l!hgeZhw0UAh8V%P& zPt$bJ+4zNgs{Twx`1|8eD_&O?eL)OEtI3J#DA4$#!uE^^rUm8#@+|Tgs6XCC9)Dg8 z2BH4oQB;B#HKgH}&ks^7XTX_eiF2Q2E(=yEchU>*EAhc4mHP94CvXc_wUPk-WYoz} z;6`~I#px3+;^Fx-c&&9aSa`+LbpE?Hd-DTY^lvqcXt+yF=I#djaxZ~)whfH8(ZxG0 zEkeUv+PHUk0zN*agPPn9I$KJM7A0E>x=tI>L!GCIkFx<>uvEqyRW0PDAeTgLF(yVQ zKa=3xDdg9G>R>r~J)ai}f%qHh7}>Z@;4gZGsuVb*f{PTblQAUqcV18}O?kml$7r0k zP8ToTxP+ame8wU(MUXKoiZ0rniLsv^kRtacx_344EA&G(Ad4nEO?IYgQ7BUUc85_e7*uk*GO}MjjM(B*HgHR z^X2sxW(}z9J(3GwdY5jL^CPRY-{FGVHdx*=hLc&T#B0vcxVl20YcR0^Va+nEy^_un z!&;&8ODZn*oy4};et}?&;f@7}&<*PjF!Rns%=Fb8EKs-Nra1250#*1Oj%qGvxN
6Yl|4Lbu+Fj1Zy4`}qco=usmSeV|1beRP$Qi6$M%)gh;gMyM zEb_<+?s%RP_fjQ^<+UeLO_#&O`)Z&docH~n$QT0-_md&$s2GUM>9?!VIYka0DWdf) zBVkWvGG+*cWZ>&exc|`ugO&^kCOvjWd$F^^z4HuUKzb`p**q2If8jj<3%iIT&raAp zW;BX_cSflt&rp4%2JBlC54}A+S76iz!3ixPT!|~8CqH?^_bDGit9K1f7- z8soYWFzt&SG`Z@+{%Q%D(UT6sraE*e+zW!*X#Q^cqch_EQik->L@Jm9nC;;_#Kd7Izh5x+#`z8F8SWS1VwZ??wr-W6C+Nd3H5U-s( zj~`XdaYLslTfeVGFv%NmiM0taWqb_h#$WR9up8?hKLh5J9)@p=JXm<#EY>!Z3R0c- z@lwhVHa%#|_V+THGvA^2uI8DPq4*yRa*kr*Jsll*>B# z5L{ebxWUEi$%h5`(9G`$PKelX?ryrA|0+!`cgJSjuql>%yIPwI%bms@5Pd@Btv(2) zrE_?HlmwoOP8HVX26M$hQ@F=7mN3`#J+$3O2uA~|Nyf`{AknuLJ}f5%pGxa-A0(!TRSp8frX&Md$_Ns*WN-L-Rz)t^5e) z{ZUZd;e!Krd4^4p678APj*1ue(0W5T7Mqlc;g<2j$JQ}qz)BroxrD$r>$NmAW;On` zu_nVMSELUS_k;mYIYBO#7b%m~7a2(5AVu`z*w?Oln1p1CWjTQ|ZLZdhK z5R$fy9{6^UtbJW5xCM`C*UR>L>@2T;Rx*`bJz|Q-i=-j>x(q%DYM|W`=jqJRb~q!X znCvk2Cg-{=K&rqVrPd5n!xf$Mpmqjc>+_=1zmCEGGDc&CaSnV+^uY&Li%}!r0cVK& zLWa&}JQ~$R9B;0Iy46!?&)9WveO)D-oK%7b|D_7D%T~eeV1M*(E9UR<5vaVO2*pzi zanrXEc=fCpt+>YTbxZ@`&e;rNdBzt>)Hb|ok%;-}H%Q~bWSa7Q5@lH~u+~HkMgHc| z5ba(X`c@vjFU%Bn1ihpp-p=%8raf70@|`BVbmPBY_n=E@7@WPX4ObgXQ9ZAj%+%eF zvlNp-y+szECQpHZ8;LmWPO$B}k#VT9SP!nTEE@MNuU_k_HFo3p zje@D7IHUXsUX~jL6Wi*j^XV~Aqvx1CjbL`S5pCJpPPTbJqAl-KLC)I~<&OU) zKc*z1TuCx{F6n@a*N4+09d|+Fs!IZ&2X;{PEuQvPjE9s-x){-~0ynZ_Vfk|lOg-_B z&|`TZQv47)^aaABvpcC96;;I6ZaPZm-lq+f_YDXcjn^D0a@q z_&h~&g0jd>ZyT3F*$8-?07o2)N5@0J`5lV#O~f^4Pu&wmehk&mxBYqmYyuH-emqE|`3j zf!)U|vH46rot{$1mNgs~j*uEaLEtWkui(g5PhX+-)@N``w;Q^LCU9HsXRuCDV|G#f zB6|`>g&3}}b(ib^y-%dx=yQjf2@+Wgr8jG0*M!;Q_EnNGWFJQp? ztAliBbNk){z8J8@LhTJG=5q}uy#0&Icz@;GRi*4tXe4_pmJU_U4{^bgYhdbC0cPie z*tQi(T=#1+CgR^lX77;X>K9Q~XYd7SXEVI`cA7PXy{9rV$!Ox8MxZAEU4%zTV#7b7 z_Fz1?oEt|Mr*A}$CSNr2-VU1Yzfs5Jt#Id;9As2HA)J9McHNyu9{-7@@2}bxb;*>h0^!iTGt}Q?1U_0{2D_OLuD?DNdtZ9d_sU_Qh zO#g${rqjv0#nHI@;S9ECTRB;?_5xALQezh^_Tsk<;k;G9h_!wFfH(Z7s}`VOKU;|2GDv&c=&@tt9Qj71-nVgr+T0rT-Nz<+*W+ zZ0d1UGGsK8b@Xh8PYY|%B%jYrj`D&n(%Rh0nq)k_pHb>ewaE@ElxPuOEVB@92ofwjXP4Y|G z^Z6>A!R9&;|6UJEZ>F=VR#i@5Gm@Ev9b|U9OYocG8B{o)LpMwb63DA+a~{UN=$*NN z9W6eGav!9))Qs75dbT_q7yV6UEn3M6;@X$LPs6vee7+Qq3F#4JfNb9<@knNA;z4ER4|wa z-=PvecH$ygeGE~NM3=${Jfl@0nDuTDMjxss-(=nGs;!mLO>{i_T`DF}?h?U@{}Rzu z(~o$ic88SaIYO9ElHySrg<*%DPvereK_^>9%#KyrrYKpK-(K;Q25!1?7N;zza+&0tBqv8Mij%Az}2u%_8b^V zSHMt>E2gaAc^k`GVN;o+kf=J3$vsTLbiLWE@!%9t{aGnkbMq}Y^7`cD539J=k}f!F zcnVG!*U~%5o1wO*fO_qf1pBA}NV%U%YA46i+W0E+*H%c|C#s;`h)7@$Khf$-QBb7n zB}n-ki(TovKyr#7K$9O9ZITgAQEnm88BSpLX)mM zOCF*1(M$Amvm`t%xhvEj-i(1^4EL>jg{z&M>xB-6I4)2PR*bwve`cDIbqi8JYAwM} zVuy&M<5^f<5Jiv8Jx#q7Ps7qL2dKf;U2w{h*EbAO$(qa}P(9^M?>^Wjyg&38jI~8@ zZqGaFv~wev4VC;<JYeA9)d~^m`;Y z51L{pNP>dLSjfMdNtSMkr2bqIzPut&$Is^ZEFY(X-uEL^r&142)JKq@=Vz&O>Uq+2 z>n8699IT&mIEjWfucCKdi%8B2XDAcT!;8n&Xn#=|naF$1_P53gOFuZlwXg~b9WycD zkU4&fFvs)eOCdme8IEZ^N_S~pqR-{FqZH2@ZGHQPL~LAxf>EZZJZln4>$}2_ms+SD z@(wHJ%VGcHGdzPK34%+aq5VQWl`U|9k1o=Jz9UUwUKEFaJ166Qog2c$H5zok&qnm| zN`YZ}Wze12X%~G+6@!ujF>CG-G+*;k=%}TI<(JB_FYz~Mw%sJX4`k?f=N;6ddl^nw zUV^HF8!_{$8ybb%;Y(|6p?~pr^4L6y7RDN2mD&v=edQ{NoMT=exm=&@T{=u7)wz0F zvIxBb15sr0q}q|!cnwCxm!I2ZQ8P+LC})?9c0Z-C<@{~>PkKIPY}iV?mN$2s%&Zb6Q zNXcQ4Zxa1}m_sFn9T0hC6q^@uiCjGA$W7}=z>fyeT>qKoEbx~^|Oc>iVs?@wn1;zJ@Zz~4Q4kZ;B zf06i0j2?23gYh9%WUhEJBrQ$Aq6tkz(>GplHP05$UMnRvA`7XJbvVooQ-w1JN21GB z7g{rp=j|B8)2>Npd0nHKmU#t1DAmIv_kSeTVkSmB%tC`j8==m;4CD*!Sg6lWc=NIm ze8v~xaz#EP*)a{2gc7G4PZaRaHqD+TK8p&;9s9M%$@U)&WK%u3pPt|N`LmVtl2tDWa(VI_Tme4^xVfM&CzwkXH`l3 zw@9is)`8CHHODqXM~KN9g|Hxz*A2HoON#-Oem9=xyoyD~*g&4KBu`@Nq^ObKV>06A zb!z`~n24BkT331T4CL4VOnH`waz}Z_RYsq%V4Wt`*e!y$&pYX!g$E%^XE~m*OcmUZ z>>+16HsfM-CESmP&~17JRE6Hbv+gU1#I7lHaaoX}E;^p~_XamFR>mskBNUkz=%rv7fs5ttaQfNX;t6X`A@iMqbfETzFoJvs!kE1lWmzl(5Px*=LGm;jB5 z6sL)#qSLrvK5DVU1!QX{znR=@S?A#`Ua~ij>W%A`{HEA+zovX(k!L=1l0GpCDh@Zot^xaDHTQZ#}{v+He?^%G?@BpkST_b}=EXN<5l zgAy+tFmPVY4cvMJ!gg`);M+UMJ*Dt^<0ZJK&`P3I7Q&h@yfQkoRXDZCl#8^eqjO#3 z!NjGDo3VZbO8!@e4OMHw_TMUQ&g1`3v*r|B3=%!6dKKm~weO_wUy`$TuoLX^}K0_3Iip!q0?D_PGrS`_7TfB@evzmf{EzdgB>TPJaJh%<4WQ2! z!zi{oW-O;zEzNaRbmEZaL@tQWAv?kX&aomBC#OdVr8gbNl_JZbaN;wPnf3@BwSt*s z(l2tF&kLE~+`}rqXA0!f4B@s#Jq~{!qI)hXv&OTxps6sNxfm`aQMXg5(*{qNe@C7> zywXl^!t^zYNn0?P;Kx|?PaluhJQkjK6LsaVfJ5=9!TG)IqiY=@=%H7oN zh2<)r!M|t_9;J6<+M1_0dBKhgtNsY^jH{@qnVErQ+o?5;Z$ zDg7XF1C~@o+S~R{#U{+yu>@*GGUy1^G4S$hhahOQ36$p(I1o-N(Y=lW{| z9(w-p(swWEEYZcSCldvys!!3tOjFqS-$$w+mx3xW;;hWk9)h;!fz94Xva4?n$|&Cy z4vd*BSa$0%*_!rIaDJ)-40xW0j$So*@o5gq-hWEBPf~%KnzzZj(hD>s#gSP5NCU5@ z(I8UjM^^i)V%LKoMDG``-!-fvBYY-9dU+uBUe6~dB#%>z)5^TI7mV}HiJ?v8ZuB5& zjYt-xu+{bku?)+`jN1!IpwBftDsvj4#;9X2G?YPH;kxe>Rm% zp)>Oj3F!GPm^iZl1(p_AY$negN}2`R&936;<1SzyG9LC@U#5Yg%6QG#9|B|);eKVT z!0NL|79w zFGIqGN$n$%?hvQFZEu8$@@izf`vASZG6X%_M?lx~Fi3LjrcZmW6aSO;*dO+RdhL^^ zUG~v%Ftv$Z^$Nl+W8S+lYZeT|$bopz7HY53LW}fNu-d1RT5NCz69a3yZDIz!SSAH2 zBc>y!iD6%(E}hVAgMVFl-*2iCOswU=`=b)gUv!;hEdbQdy$v>3GDt{cHfC*E3=b#% z#B=YbfmOi?*nRChbYIJdjnVIfvj1ebXT#E*WXxCmSCJ39%A2w0HL%k1r${zga}qNf zgkL`J|Ih3;Jd=75CQLNs?%SUQt&pF>r#EkqhL$zxL4C0?Yzag}4Uw+o>%@Z#hT5@q zm^k}}P-od@j2uo81X-HFx~4eXT^ol3>OtUd&`p=sgkVsa1r)j*rH3-&`8>QCguD-e z2;~a$_|bguFx&+q6(fn}9aTY%TR2{I7==sjSc2cc46;Hc4$rBnVeJ|b;yoq~rj%Nv z%UVOY+BXBMU+;&I)xq@0^}V<+E*hU!{i9W**HTR_p1*Fa02gzm;MON+3|gKo@Vqw} zLo1KK&*nVT(yv6@_GyCCiN{beF%$0Gx&n2TXJXfsNWe)?{CZ z3wqwr+^^bL;O&Pt4gs9q?LJ%?P>COOzLUVxpLBOtKS_PIgyV85!TYo{6Z^KGIXo6; z68|m3g?|Z<;8Cn8JrNYYl%Q;vE4CKr;kPjdAnQURSiawjeY+Vf9lS26;h&HEtFdVC z%Y*rNxeN z>1Dq_Ip2yKRbj!5UcZA~NBimPrRLngS_h&b&F5_I9^~qG)S;Z5H!dq%MWmjXg2=-S zF#pd+I;&>@w~SC@i_|!_>)JJPu{jYp+a~bm^9K0cfi(N6U9U4~;{RcN!jie6q zO>tUHDt!Kt50_DgmPe++heUJc@ApBlVn~);NU5dY#x~<6epVGMe@w!)rHJ#0J78u# z7bH*aXCME5z-xOd_-+qpw($Kb&TqqMShk{?=3U@fL{7KS)~Qc$S6_#-I`IHbH*Upo zWzVtmXdMoR^V#1PTkdD~0B8@$u$k+|vVgwNc<}|#47agiN!ML4;z%c`SFPb3hv%W^ z&n!XihXT;+*aZP4<2bj7Pc%YL8#EN0u|<{V18i9VZ#kpLa9iY^94*I0_@LXbTRyAM1$}?K&=gK@X`_ncwYknb+ z-D<}fgjEZ6S3km8&CBp*gAS)~<}N5)5aoOyE3wh*yqM)`cP4rujMFl-;Uw}zxkTq) z{I#c>Xy~dj4^4`j{-vXu*JL1i^y(yeaHfM@)Fkq%)PHjPX1nT0i3$ZAGG z;?hIxMdoSlfZBR)o&P2r7od`>Wfr>IgdO%$ z#BO;Drl9p2HI|O&w&WP&qktGrHuVL1Js-;zuaoM}&yGU}$GM!>ha|YV*9$GX zUXX#)65N^3`YdX%Bq#pFf-K>)!^_gsak5(v3AOXUY2gN-_eP4y9T<(-7m}c(!GsxH zlxO>jo!M}8rjYAVX4PZ7-~<%IzC3p>M)iwusvin`J{_klXbwJEeGe`^Z~_IbeVjp} zJiQQR%N2TfGtXN(5c;c*Ufm1We7b-;FAQJ;_1nBJUWq&9TT6<2y6KHKE^Kl3S$115 z3%}*u1;_&~#9M=T_GxlgqQkLE!G*i09z)i+K19t8>Vi+b9-Kz*db(hOHv4%$mFMT* zeiGIW)d&(QM7e-AYbG^n zAwBZ55&flp<8Fy@+^a|OOeEzVHvALk{Fgd$>rOqOO&`QSt8W=^u1Eusmt7d+Gl7%h z6p6gwO|sZ(1it!n5&nyq4t3uYIJx?949-krx9!~on+EPe&k{#4NT0xzY%UTBiwaEX z9}D{(cfqZ`qqP6kP84`#(XGqfV3lV%1iY@Hhm&}0o1z2^cnwkW_uo+`WUJ zViN3JS1j1uAVLGZKhQVDlBi_;9G?1b$1|=2bj$BaXi$+ybyi%YqJ;!HqQ!kM(>8fX-Re2t4Bldcnv(aH^0pgY+KRh0%f;=#w4_ z->jUm@AXJ{`RcrI`Bp`Wr| zlD)bsG1ev=%r37-E-ePc^ygvcB_{|GGoZEpRkXiU9ZpDx(L}jqbhwcKT@Ovj?~wAPTi+)l9eA`q@8)Nr?!7(#PjCwDOEeN2E=90pU z#ox&0h^6p6oX>bhC}Q0}2p;TcqMg4Rs7G8d@m8CNS!*r9F+GTg2WZ1fvsj{*WDGh9 z{&-e32<)VF@sLX>)@r?@jn?B~shbZ(E?7&l_Gp3TU@oz$bs@W>_>7x$E9Nbn%vN%I z_ICMZ98Kjo(;g+xN@F`#-K)aHnWJe-X$O~(eTrm^t;GJ6ZcLvVN%wn9=GOJ^g2ER! z!M5!kqURg>AkdSUpIOZ%j>&~W8&@nEZB9h4jTWBHu|`MzAkuPU7udA#0Ke9)&?3`E zrj1X4BE#$C_snp7v&$Ht4$K7+Ro=^bTov*MIgoN3rYF|%*`8nv&~E3MB=ZgmvfFtM zsdhHmv1c+`>OLV+&5fwG_z2ymxdz`gXQ8XiU7BcfoIY>rhu2qQ>~iKF!y9v&>D-zE zIPvWTS((Xm-UfEyuM{D*Jold7T_6hKA~Nit8^twGj*#tzvRLxw3%pe>#-7_X@Ol)V zVV)p^-t(P=8?MFj+VEH``fnvnd>{#$o=5rY#s}*7pBr(!x|A3x?fIUbro$Hzqw5>!t#AN~{cJ$p$<9yAaPol^%DTV?p- z+)h6FPXXZx6Y{AzTWI7X$F%>b!_)W6aFId)Ei&^bw`$VJ`9+O_e$$77X@*;1b3r0i znK%i~r_O;|UZ-VRne_YeDAJ&-M_Thfk@Xq{RBDe4kiz4b_^1&FBASU+mIAk%_gR|u zNpK%ZY&nk$(m3D!HuOK1=j!#$IrIKHl>In@J5+It(maZu4ue!mbrEJ=@W<5>c6@p- zgheSju?f54(Xu}TRGBZmeq$K~<-CMRx>nqt7knRA*d}bcZ^{-dF5(V$Jc6+^wve}@ z#JRvzN5K4C0=cjD4))$w;2w7$?yxs#;c8Mf5DR@JXt$}c)=?sW;*-!e{ zEP%xi9@BAmy>Q-qeY}4;9A6KgrX8}+$svgtBL1M6y6kd-Bbm}5pLd1t)er}5r3^G^ z+lGUgsrWYU2z}tw2qe0QbSx~P!Qb12y19+yl@?(C@moUI|0MB5SSp0MZGnweU&$Am z3CE7*)1Jh%`V(41G;UKlDrZ#EHJe`3%TvVIy59}Z<#!lvZQ|L1eD6xew*BOfzatmA zxQrstybf7WNuuXyLj3^)7B9a7(=)i zxIH6F*wXZmbjv&u*!ku$Y~p>!M<+Y6E4TQrKeOX}ey$OJj;_GOmJ}wo#+c1$&%<{M z?&FyEJhQuV3v7=V#WTu^>Eqf5ux*AV+gLr5d!kchb<>i$*x( z`9k0tc%yjm+!8?@5XVQM};JJZQf2+6kLR=bGPBs zFlDw}dlQpcXUt9aUMf60sLSU0#!{05Q&GxHmNP1?pmISeVEh%@UE{9^L%6kdFpMD z{w^E!qA294`~rKIE;y_z3StgLkZ@27&kkyG?=Bsr{6K|cw1*+*%U*JN!8Z2!vj{E- znv9JH zVDz0a+{;``ey{Kv*DU%<`sDO6#%vDPwl|1Vm{&xvgs#9~I0{bRi5<-JJoQ#T)KHk4w_r(?MC<57Bc zXE_+Z(%{_oy}+j46yZ*+taI$vVh8WsfLv91FnkvVntUJF@AUoT(WNHgsg_c*b(AId z&}bbSRvRWuT};@>bylEWP=N*4BhhFLV*iXf7=1#EojQCT@{6X@rzysqP1k(7*xw4s z8XYDkX9rQ=m0`q~*N~%Lger>~1Wqw-G`i3Z4BHjaM|lNm?)^g*3+KYTVpG`P9*yM_ z&Ee0iljzq}%X9T+R_R&9C zKq`9A(T}3zgsoFsg`Ok*smkyRfdk*4xAbD4AV;klTgDdB^9D1irN9#Z7>AK$y;*o9 zPZ8xNmD3UYym;f>N1<)Y@A{b|*V4@~&Umyn1yfaKz<)|5sJQhi9xXkKiLE(!a?(CJ zr!NR*FAJvXGlu9ri)Ol9KM+q0&Oz!wAEygFa0=5GuIki)$=b)s!=C?0Yvdoo*{9>A zKvm#FHqh7PBdjrEG$Z~pZ88#LtA==PtXVq^eqszHYX8b%LQ@#VA{&1&PveZ$;O9EYXt%gsX&X9O=8{e0hO(cI!C0}+q zqw`05Xk9OXblV6FRc|EH(n2!fxdQ4;NT#YW8}LL(I8nd29IGGN!rAN=VZJoa-Mc=I z-fNVCpjiP#vs+9!-qeVcPW9%yyi~F3Xfx^Bs09b#0MOPI)c)9X;nhk7c&s!D?PK+Y zPoLeP8?D?yZTodXbk30qzr9Eono^fnTR}R)2s1Xzk!j*Nw7Nu_?}>RTIK+22YCeBR zQX;m)pQswh6BK}e_idXDy+zV0&6d4NBR>`jATcms&t!6F;+?~fr=HZ6k7 zyuP_aBOSeS=VQ*K`>5S>g6|5+Iea8kX^Y}n4YBqJ95KtT*?x*%+C|er)`A3EdluAXK!JJPB;WMejAiI?0MX{v>7BpX?yu4Ut%K z`WBqK`A$GLj^ozsy$c6ESi!oq8ZeS4a3g0GG|FuN8=l`(yJ7?Rez1V#nXN!>ejQZ| z3`djxv*c`nDJl9D23|QM(5rAN*1L>>$R(Bnr6aQ-G50;4*Srs1_vm2PKSqjqu2o9z zFzuP6290xu>>|=)=&>8l*l(YS~uFdQ(}n5#Z!oJsE5GzYb80*7DpFv5fwyc zRnsis`Q7f)DB-P-3k-aqE#@OF4xEECHT)s}l^Xok=^)Mi`nY*^op2yV6uzBp#esu6 z$m!FQgr$>WnZ|4d?oZ?jR=@WtPFt9SuOBYJ=Y!3dou5whi|-+5WWkq~E*!Sa#Y}Ta zt~>7o%3rul)1H;$p+$SpUYt2#U(S+_3j?xcvPIIo|0c=&M z1Bf-e#J2UOoKkWXTmC$k6YD5o?DGR|lbI_ftaV^pI|JE-ai;hpOpOhN+`+w0JmXj} zpLm^cxBK_<8ovFv4Y{mcOvT_fOU{?!8m^sY555J$yx;@uO|d2$U+xY!KD>Z{@#naa zfAz@aH<7IIjYBP(EQ2Gi+t@2rY0RBEj>Z1fWOwt;n8MNhBq%tF*%^F>@QUxaX8uiT zdutZ`c=!}O*>nu%@cd0Jjab&vH52PYc!rZp3{lds#UkZCn$nm{T==`Vi}OihQ>ITs z`Q1ia`xv+p#^IqjAJX$h5q5oDiE1@~ixrH)jWf0!B6n{V!{5)f_|rE9^3U^ZjKi%UeEuK3{CEQ1k=Tx+R2Ki(oyUjvEwI%g zjyb32!stEapp-q14M!H>e;X|X{rQT#cQcmVA^bk3GZQS%d2-{c)0xCbcP7jCx>RrZ zPV622gFg;=*poULd?h70NZ|8&+AndDIe=oXB76B$oQp3~?QW~roPfAzy;`wh8oI_n2H{r};m@Ix38*?6!as7p``jRNi zE6aurVd1=fTZ5LjpMg%!4CWMQ##L_i=VCroFxTW9++TheE4S;wU&*6zKq{o(U!fBp zujOaKZ8Yvj9XOcLMIZQk;GHdnv~TtS;ppivap%-_y!XS3X-+%F+CE8e zZq?h_*^$|FQ#Q{So79RrOPX-hyQA!#&RAEqO=k}0}pDYJc^5gLj?@jNPI!)#DqnVrj9`tc^5FAtPr`0#R@YsW1$et=e&R;49 zg`Q&4t(5@WVI!QG7>lExdO)&DA2=KuhNGX`;PZAx@LWBcZd@CR_on*_v)_HBnI=0} zYefc`vHK!bj|;{ngJ1Y=r7?mpsteHZ_68I;F2uAc&fKzZU2yAIGOEhe(x^9zFfTKJ zF0V?({iP>RXXX^zI^r8R7G1?^?NsjmV{`C*m4GrYjId-xC(a%%!T$U99IWhaVAkJ$ z=>4O~By~pPtsNpz(qYd6M-`!)+k9^OwiCGN?r5g_S`j@~o`Sb~Wa!%?9M>bR#aPDy zUYF-RnX1Dezb+CdD<5N9cF%=y<_9J#8@)$@6m?x}UQ zyZYr}<>i}bp>`kp9wp&h={2mi0S_sJ`qz5#O)sAfp=*V z`a24IU9BN$=UKeFt_1JpO5p4xhv=zm3kel{N;2svh^o*dH@c=_&8t)_wU>vPq4Ch% zlZRsaM#76SeV7)TOWn%P;J3C)I^%+?P-Th}YCLm+YadS0W??%nS@eY1oQ*;;?@{oo zb|WOk3{mTImFU=Pj*=0Fq0}aoln)*g6h6-+9Ru2mc9rEmw zZFgZ$njYr)j-fj?yAYo*qv6}ZN5sbMCk9U*3psmy39Uff=Q;_ihcDu&b)SXFpKDO_ z_H^j46=MEg7c^i05biJ&+#%xt5${uoCqHw2UOWZ%2OHunJ$2YW<`mCdiNGg=LDc?$ zDAkldPrDw5@b48x7-w8Zmz(^eB4NX}nv>#5%JcnTSv3bkppX0>RYoT48jFjK19?8O zw!m@rXk1vd4yC4!fb7F{R9(g$8=v};1=FK(t=DGsPfwv;`@RZ2&o9D;*b;P7zlfLi z=a2`_mcq*NO!CEZ95Vh69CTqeWQQ-L*-iWCy2v;hw%&kPJQxqp$~l3NBY(E+{!ZVX z%z*DHd>?32DNO(UQ{cWclvHQVhd@OIn4;kf4F@tH%xJBHE1d~7|kQ)#DW zxOXfKRysZt%v>UY_tyr&j|vwkxa7p12CT*5JRPQ;zZrjIF-)%)(l>cCx$|EqI}NPAn)Bz$emthS15LXA@4xd!PtnETH)sYTKJV?Ue%cA7 zO-*3-(+RNl{yQ2sF9-H;@wn2Zl_Yjc!DsT2Y;=&uFb7F;<=b2As~O81w${KFt2^Xr z@h86TQiT}~InejlXOf`MQ@G9444-FPpuyd-uqI#zR}<-5G@3Zyj=>I}1^A?W6V5i-1{t5mkfJaX;maS#Nl>jJeRX*sh6<9%ycBtK zeQ1hhcU7Q^4Upgeqv*WDvHspTE?Y8^B%whiAua1U_d!N7qCqO5($J7fC8g{wqoq=a zq)3v4=iDcWG}KqSRMMbGODbvoKEHom&*gIA`8?;`_xt^NEj1uEUB^Yb)8~L~f0S?+ zT@4x4W+KI?vur`rF!1+q!}(!fMWwr{#HlYEL`Dzg@ypK1_)85%G7Ih5*F&>#*zyo2 z=dc#579`-N1A;l^-!{>n@axr>}KBXM)pe9?ao^U+OJ6TFr#gEb8*ROBkKednlS zsfH%M+o}tO_vPW#u@(H-meaU?Bq6>-Lt$u;EB#dB%!Bl``Q9NRq}tL3vhsw_IH%$K zsNfIHjZ))R?nFSJnKRwjGMaDRBE@(2pM_^3lZ3BVtjq_mdLN3?T4Y!oVNUqCJYs!Rs;u zL=XOsW9ok{iPN>3L36n7s@v^{G*e^~e9Ru|Eh|Lqgc*6`j@4p(1zu-i} zmCxX$#6rkqA0aZO77lF+Cd;R%3 zb{+l+@7J_rK+a?^UY!Othd#q?>-B82=@lzd9}b-+*YNk3vv@8u1}=L^(A78pk?_BR zaHy6vZ@G~GwoBvbnByODZVsW(@4v$Z?@q&tOUIz@j5Zx6?9&faG@{8j1OAm1 zyt(r5G(LtVFiF1&{K>H2@a-jF(vA~6f22QMO>}s*hZO$?aiS7!V>ah*3=XkKhT(79 zNaT(Ybdupcq80TVMX4U#RA7xZZz<$Pfj*?zLIrhirPhp9y=u88WeDAAI+A`!0xEyX z0bZ>BjIDO={IsV5U!o;Ap46mioA*FEQ1%>2jLia@)k;`D)D=&e3Cx?-ZRFwSDApRS zK^yvNz*=B;)g{e>GvPOJ=j%+k;j{yOhg?N7-Ab4!xLQ0%{KSXHCtzo>2@TK@vU$R6 zU}`qu#wN1#`np4qB^?XVPJh5N<0#N4H_6ohywG505xW&wL*CmT#@coLuygo()_sK# zu|yrd*{VaIB_>0(h78EtO0t*RL=dqfCg2;Z~Af<^#rQB48T_1+iAKIBP zt>Xb4BO~x9KLpEktDxmw16aNvz(u!S6Y_m3vszFE%A1m5okt&N*dGzxwYk{gQVT7Q zgx>t8^APrNB+aeMMqB?kux>>Lo=rT9>30JmjSDWC@7vH`>k_^?8%U(G0x?!J1Mq1* zTAjZuII@-aj=O5G!eTG^QznheH%Fta!BlWP;m2Oy*@9u~JuqVaODh{qKL{VO5nIz- zKv#``rsxj2uf7?#H3%7ZA!8R*I19>7CyOKUR4_rQn$&bnf#;iCvFTbOs9vyw4eCnR zs;UcnCN2b$vkO-Z6LQ^O@2~>LCF1(+Cs|bNLG1Kvtfcr5IqtMwboj_Nq;>zX zF+&8d_)cTASWzMxbap1H4_SbIvrbvHxL1pwT6_{u@^2@-rk8QS3=NUv9-+t6JQEV~ zDv9^pbaMZ*m{b*?BR*TA*r*yUabZjh%ldC4kr+K(a0Mq~VU55WA1WnEI53bz>5moe z{aW}ELNH0v4})9Yku^cxtSwz0RE2C%+j1Ak{eF`;)~B&}>0P3ChPnbTDhOjo>SDM| z33G{}=(c19E;rr^v&*enS(h^0%n`am<(jOtYc5VpQ9%3X2h|hK*Q4W&wasr*Ua0hTRshN|5$7(58T-zW+szy6=!CS*kS>R|KlZBW)@L{bh< zg8^-Yb0SJ z!O3WS+)}7a*S}NX=T>K9$KVlEC^2DJcRCtu@+Ok5>NqXX7evB&y}w{7?CW|A+9(Qb0>0Pyyo`a>wyTME<1UOC_Od41X~NVWSGlj^oUxY^Z3^1`pfd$h{W`1HBJThSWf;5>~U}an+}1|TP+D?LnUCEXc$)J89<1HJw{~4 zv54ad#9iqV^sLy91{X7!gsxuoo?m;}u3diop65gGER4dGtNE;Mq8``2br=q8Ed-w_ zE#tT7UcxHhEXz+OR>wu2_l2zZYZuV>ABQOiJ7@Ux2?7#(-n9Aumcj zOQol0@*L@7bbXu)_d3~znrrsKZmp^G(EWN6|2>kM&1O_IunG-6&mgOTbDL-V=!k-Y zKla^1_JkRj|mTbmMj{E-2W+QPx>Pbn+e!u1HPnt>A~sbo_qa0-If{MJ-Y*!8h7W*p-H$zSIilH6{^0AGs)w z{UbQJek zrAHVtcRVW__0dxG4>?zJ|UO7 z`tA%q4My{@n&ossVgY$8cpl5noA9sFS|D>{2-Qq>;YD{sxp{pN+;382jXf(wpQIwu zbmS{2b~6RJ_-pv^{&4QL!I%xJ+|9+2+QNHpIW)h%f}$<6sQ=79vc5cuX?@;8>*y^O z@-ULm+EEW}%NKM1b7$aN>{3`jU%?|K3BEUU0PaXshb1R1sK?NQ@Mdok{^mR2YwQD- zQo%$4K8LtWcs@>l2YmMFqjW`)4jpvV2QS?Zn~nicryT@qINZV{LX+lX9RAiC@`qhPk3NH>hf@lsddfnyRM z&T_H%PCABoor0y3vuNJxMR@VdUT}Nv1nl2gF8wK*Mhm^I#>36Hr28=QAG4f-(CG-Y zE#@0!_Hc)cd0bcZ0C*%`1h3MOnD%}Yuj-n}<*KLRivyB$ci}EROKu~tUffcnXsJyz z7EKqOjjhF+xp7?a<9U=gI*A8tcjU9DbwH(TIGUf-rGphMxcxFQUJg{GPK%V`>5u=g z_4_pJ@w-49%dbLdn!2#3@8l;*KbR*jr^?;4`L178IJ$H^UHEAM&Azn&1{}HvzYNpi z-8P_jg>&bltvL5w2-lM8#7^NZr($4CS9)B82%8|-|K>8;9u--AWnd9KKPd~$_qOv9 zlak1uH(T+ytDN}O*d81{$cQeSR>fT`r|{;UB|`UGjf=l1a`$zQAY-{FT~sfC;uRFZ zH`@`u2>yhNTHj!FfY8^|cjIome1#m}5dKxy2W8hKz@4Lwu;r-%x1F2?9x8r3U41G3 zQWfSW(tjb&aRH9Gw~uW5{Q%zmaG^(ZKQO2B8)3kN0rX~_8Jv}<$C>X&)9;fnh*KR8 zfzj+8a46~*NJo0nl6VJN8D>u_#$O`0ofF8|gld$L6*@gL`Y|O`ns!~=f^Hkmv9k&X z;nG<#4yzkUKbTdB$&>eNj}8x#A~agAXHbHEK-dYI?B@_JupjLuuB1| zZXO`PNnU8CybNYpFUP<~@n~S~#`=#t3asrg2sjx5<>K?qLKu{I-)d#)bN?d}Pu8nZ1oHv;-G3-|Z^k-*QpZ_0|kZ>}1(o-ABws zU>>gRBiNK=k9Q0lacSsl;{L-HyT`Oxt%}Y9f9Xm#r>2m7?Uu(g7ax+sP-|rGPu4tc z9Kxo@rC{~_EVg!@8SD(0LT*(SGRRgtq#w1|Zak1F5fGC@7h0>F$WShMjYUpN)YK6N<(3dZ4+B#)0`f4ds zO$>qHLH2OaMHwQE%W>nETJfMK3Cu3WPuOdv!oHtcpdTEI#}{hAsK5*2wn1_vAY5Qq zyXlLSeUYUI?~{tlk6A(D6PEJf4*C7{6X_jR%igu_V&r2VsWJD$Ezgq5IrP+>h!I(ST1dp>H-lFv1ybB`hYg0fTv>buw6(yvNk6@zfwE zSzt%Zpg-l>@#^onqDkKKk!B~cwE8oc>?KKm*%e`AA71H{~+c?e%3p39%Fc@C0KTbYvJ z;QM8^9Fs4d;a;CdasAs)XwbHm=k)6GBd@1ZSMAZ%Za^|M{Zzs9qdoaE4L4Ghdx^8| z1a{kSFSCafQlvQ)yyXY6(}j;%mxnM%%?l;FWRh^aIJ_y3sgJW^0k+I+sHb;pC$1RD_J$OE+oGcn!!F~pwXWq8I z*qHDZV$jseHteXwjj}NqBk-{tPQD;XhNW=-kdRyZJd?GD9L3Hny<+WAxv*SeB;RUh z1dm22llf<&$)2|VFsH-~7wc+~k)IQY;=P+h${`smi(k-0;d}-U{er+=yC%dxenvwF9p*@ zHv9N{`VHkk5)>qA_OD z1x%b}h8r$U<+AxQv^T&Oju?k?HF6U?6Lt8~R2>p^WgB>Je+Tl1WO(s@U84MHA%7tr zD*9z-N`GFpqc#VUiTnx%JKN$}k7*q033rNVg?3!}`8pWWYKQ!}JM2HKPJ6$7#Oxm@ z1mECs*3^}X@67|?lqipUUN6s=EIx=k@)}U(^im={LI(DDHo@VecgXjS62i7^fiJbD zbV=i0?if&oRVy=S$>aO%^HgQt=zaqgT}JWuMLR+Jk_Oe$p2CMTYg4T;E_9s2Ag*UU zk{Sw`m3ar;`1-R>tbdL)eLwaX&f41qK3~hgb?jD76YQLl zMz3CuLCycvaUEQQU;Q(8&bVVmvFt%S0X*p^iuIPys<8ptC%NQ>4$FSm3s$)*uD`52!0%% zDhX|#8=+~zSSnTU6sD1_I3^kehHEKVG(VrUoH&QUQ&q4#`Z%f=*je>O?}Za!hj!KJ zO!;&?nvK{Aaek@joO761`b~rH+b$80A1Q+GOOCw~H?uXy>tW;%DUdo8f=zjXn=1A; z3A>+yP8&U8tl%=~m~IM%$5Zk2gQ1|+UPj`astCSXZ`I%vfhz}igU4EVxbx-&-q|w{ z-R1V<=)VG6t>cV%eOLqOXwzo>9mmN5rKQ+ont~m*gCV$gC)}8sZH1@WNz{L8t;M*gRHJt?Ko2;);n`ss73B%xZVweh<|?2=Dq{Qj9!3SJLS-+ zFOSp~G>DgVuY`|n(oopf!vb{WS>RK_Uvb0~%n!X%37O zlrj5bUBtRZ$R=J@1K*An5Ii_Sw0fBfu-9`j{hm6g*4`1hZ`8!La}uC;Tj05TNMsT= z#G>TPA5x?)umT5nS?OkHv(=BHLB1!39vQ%e)bd@q}?bP7)y_sZc-FnOj z9LrUPNI;Ks1P)LC&CCX7p_|bavcv5U9uUq9)ylCX^4cOk>GTj@dn3!LJ}DAwvXkKY zR#}0wu%AiHXtdH8<&K&&v|+x$FbJM3oQYb_kzVAdeg4Af_Oa)Wk1>!4sS? z^splv75z)3rDzM@DQmEQuNJO8;>XH*CE0J6M5x-X0j3kW$;^qd=(OG#-}M}2A-B%3 z7)LWmTWW+8-e=VmYq$KYH0&+L4a7CYZ$$3{;shsyce+0^k>Vz0lOh}M!tY_h!y zS+dLLTC{n+`2J_yu=;8b_P?K=S)F}?Wo{v<52k|$=u+%iCZBCK>sWbSIoct70*o%NA@uId-X%(G^ny$6s` zO-J@de>2Eet%t%oL&ywX0X9*QB7@C0Nra^Wk**iA2c2udrl+{3RW%-@U3vxXEMZ9- zdRfN!1q7d{5d zq@jv>Hy#{v3S?gQke99q3c2S=!;&d5^z2WuCR+t()7q^br1*o_Q;kl{7tZ?LJ*YFf znMCd$2a}ql_}5EvbeYs)sMa^4N;SgWWt6~M`#6wyN2TM!C#RwA^naoSGvxX1={xAr z7Jahz>~Gf7H-*bSUx*XLcfpAzvJLG6Fzt5>E0hT*k>Y*gNm?;*v{Z_R>U>9oyl;Y! zJ{8)IOhY|Ib?$5KCrVX!2dAdrV)Yl5;PKIuIvyCyl}6lQJ$kbAN`@!CC`*Hjrz7Bk zVWa57Ya#2^sz5(oF&9Y$EyQSf6Z-aR8hT2+#04{DKr!$PybM^v;jT3cs8^$#VGenu zP>TOj>R8hkd%oN>18c5X(D7H_gGPQZt`E8n)j?8pUGOb9b0!ooZA*pA{=YG6$zbj@ zYP!J0z7CEmuf(Y%WayV3W4bGG6#A+uQNK^eYTTb3XTCSivMH_y>39n#exfc)G_HR? zjCQT$t6c>LUBFzJ934qlU-qWbs~@0w%w;_3w@G|-*Z}Thw-jc(%J7ThBe~10@%S)i zt@yK7A$;=L&Rd^-2e*YXeCGXuH1fOz4N~eS!>V&(ScBl_7*o)hMn7aQg$wwT^D)cG4=8NxT4uL%lIiqXq-(p2ClX|A=b;IC}KPWMO774=RWL zMNg$`puTbgliI6+DHlJo2|tC;cRN*ZmwJr8Mf33IrW$nnvJ;#-9oV$blfbBaFSZQ| z$4wWTakl?-EHPP$p;F0Yn_C`UpEDUuBs}5fOAV34pKapENFSuVuf-8jrc5g?4J3X| zLCcAUN!Q%68rCAilKL5@?I^%aJDr*B^maDkvKOlu7KQPc2m3P>u;aOb=&i>v-1c{5 z&E`d3@HVfFJ&y0gZ+^eYr*LiZ&UrJtTry0Yl+rF9@3;y#^{#}sNn0^f=MH|)Jd8Ky zyk~NQQpC4Scc9$IgY3`XNMMRWPfGbSXt}pzSIa^2x;q)1JP0J z6U2w-JtL!iQ^iwHyWp3l87ynrE%sD9+R8U15uDa)LdcFcZ1Sc-kh=Av*i*>M%6WyO zS(SH4hN@$ee8+(4m=QU1(LRpSwc?)iMcZ$Jf=jT#M;K{Ympzx zv$kZ&^6Y0JchgB(*8qHcXf;SUy|t9MalfYdcCPr*TN~_K7Ku&Izp-oPu6THZ0?zlS zCr+79NYT!3M1S^nSo9|eLVCWE4xN#ZyK+1!J3p2Qc^u-O<|F=YEwBPtET~E8n*yVT z>4CxP@$laaa~!uPhF#6|#@|Px$ZMhFq*1bmInJ00!e>1h_D>SC9&7>o-UU!vF9TEl zK4X0zJ>u2L`lP(Cf)V9tJU-$evpD!#a8+kn1&o$s`>sY2*O=X~NM8wZ-U_^M`2=u$ z>4X95F~nwgDEvr{!)&Y5?0iH#epJwg?@yy3_}gjtU82Af3@m8V(+618Uk_1OfkQ2I zc~f{Nu1gX!n9><=RkxAIcI?G78&sk4=oO(KB)DMmLh(Y711S2G(&%@JG;FmjU!!FM zk+tXOMOA_Ic%c+458THlQ7IW}?~fPlG?0vVL#h<4A*Z=I*iU!)>D(}v&-of*g)zLP!o$AX{r zYy@FmdSb^Wx?fe6@*&62CF3B&F(3|=$;IC8V)4)KC%`%HIVha_B5=i4!SDE2WV@6! zRDlHxs>s5i51FXEz?YVkuY#s6l5CdHQ{4Vh727qBv2{6vY1pwp@Vzw>jHX31djTnC zvnR)D-H`J{{oiq>yxR>f_XtdR{UEfttsp9Bn*#P~lbF+mMXYA>M6`+BCdNrYD1B`P zl($weZZ-kldFe0-IVa*VvYgy^?I0ejyG0e_)nScUDr)75arqmDug_{>a@-SA+1**= zcGm{AEl%Mw#WE6J?IWsi6LO(Lhl9ie5uW`Uir0^okq-4Etn*)^$S&6(adRmdTwNwU z*`Ow#b!aO*Ss~4=*4@Kj^D4#8>yr4O)aUTe#2P=#-xE3+SJBPG9!i3u_<_IS0^4C0 zcUTw+)p1#vmEnc|CS8V@{d1`EfeMgme}uRGOW=0aGI%!U4|5G4L%uDwq$TZB_(6Sb znm1((ZT(O{{`SekJjau4(jz%Ei;?B=ulDlc|J}tko5T6`1uFFSzN7e{2B`6hR_5_3 z0;t7Pc=NKEyT6`9N83NfcK1E7Vr>NZno%z{*qX@=Ge=UHYYTYcJWp!Z<_i~7X5-gJ z3urMOjGb=|!gZs$Jg;s7H*_3JXZD%GgALAnLu3}0I^Qb_IyaSnSse_b1V5gl{F?lv zUbJeHj>WXBbUt}xED4y9Lv%7`k&yZXG#qe|sf`{)4M$dBi_r0|{bV6p^hD?y6}%!6 z6BnUV$^vE+6A3ETk?3D^lvN0sSc$EBt#W;WA*E3PQZ_V_fN%3ynf(y#x-aEmE8b3 zHee)CKcM`&T{2vO?u_-dLn;+9&O%j01;bX;q{U{ zs0prQ0iFZ-+j0s1Ltb#}GzLQNsWvnpJ&tekk%!miam4YQ9DP*t38oBG;sJI0$n@1| z9G=cb9q)-kwq`CL?$A_|`*neMM{YfKobqGK68bRsUpM|#K8FKU4#ID*txVC>8ka7S z7rX!RVa`=kVZ-xmpwA8QNvJ-&xTOhK*9BN@Gs%Q)H`cQ3Th}dT4v>UBGlVSdflT(W zXE8lkaGsqERVDAgW)NlPa)_lKxc!+1v`c7Gr31GB^(Ju%-5tC%r9fb}48%uL3qjs4 z8B6vo;wD6oYJ7Q(%er6VqS!8wE0g6nccPa6Kq?vcwlidltGa!hRhnyulcCMwh8&8r8HO)kOk|k}eDd%KnAKJdl zgx|MvaKy3*TsQj?a}17wme6=^y{87$wpGK;D1Ba+D2;PAdGcf4HQ?>)N1tAlp%->m z;ak_a1kRqr$!jyYU49MxS2BV&K9}dYza;spIXQ&QX(!%F)2OCRJ{#wjgxgko(${+> z>DM{i;px3GoUXcplI|+})L9j}s`v`=wA10gi=ye?}$rZZlh<{4t z_>|O9Xgp&Y4{e@G!jxP@Z_f*k+A$_Pt5^jtznKo!wkdE@Hy+J(p0PvgH{!0i+qnDk z9#DiNP*+(gxOJb9h@X8ZqyB`v^tA$+mkt>C3h_UobCG8&Nkg`7-H4X56xGWXv#-d%B3jZ$bv1GFTq7cu`nOLQ6VlHAd~2XybLNX7K>E0 zm3T<_9$0EE10^yWQC)Q5Ay^luT}nIqo*>!o~u{67$hu)D3w(xao?S3=YL?9HA-j^XiL_q9V5?5HbcbT_L>L{ zLvg*yV|Mjk3mf>jgqgk=#SXR{C9j98lk`AuGP~abi;Z59G4vrD^L)SPr-2K0^o)X~ zYJbF+;#a{R!!mYF@+6Kwe~?WsS7Xx^R>5YAVwC=)4QHlYw;F6|AU5_{!UB5}(S5xF z{77_wCf~8}Tm1qKvyT>sx>Sh^UWSv3BMuM{c?xPm``L%*MP!p&yWkH@5m~nheGhX( zwq7xttQFaih=KYjJZD)}%N#OMqJecy)WrG6r?bV4QC60NtKs$;8!Y!*3a+P9Vacx3 zxZf^P+`KgkPM*z&#IqVa{9-Qc<>@u~#kW!Br3JrYJd39sb;S7EcqSCh*sGoE(CkYV zYV)V~Xud3Ve;LEig`dDmWua>yavg`yxgnPG*avH_C)W(kkLRkpi$&>?y`t6LA;e?h z3}`c5M0WJg!B@-O;gS@=cLNrY7WRehH0&U?La*uF1T8FAj)vhI!!Y212dk3V0qPHf zz@~qqDE;Om67g^<+Vox*N0g8n|DCHrdW|9Q_x8Lla5~yZ%n|-(_nG^X^XUHVCYxUA zAdZoF2QF{)=s0y@U*miL$BT<_W!f!t5xhhF|WJP78t(kuQnlGgsW-6^(Zt7QsL}!QT{-Njy&J5a~0X*s9gf zu8kKmuTKR&Sd12iI(3qezz-z2X9JkI#sa(Q463OWB=`7Gyk=L-JX$V^_Gx>fYEKLC zc@#^|+>;RJr#&PZb?GEaRvz5fRfvLTU1Se~-(Yd@3B0P5Nb*;fGPOD@*x8gw&bQBm z!-4I@VzvnmIQmLtT2^PZZoVAcs?ot}@dCc*?^MtqOFu-}KvyBBkFsZ?ei z|3uU#X~A2BZgRqfr>H5t34WgspbwsnAu3az;+^Uip;uOp8m0Yg#g{mKV9_Mrm@kJJ z!uNJa5MhO_8GU> z3y#*4sIKi>rY}2!p9?<0{!mjs>X$2d`J)_zSC-2vaJJlZIW~kSSOxpesEd%PK+D@t(NzeDJtOM3inUo;fo?-E5CiEvJVLd~_sugUWMqvZCHA>65WFq|#8 zP0B4EVtVrv9CRugc3tH)Z4yl^-&YGZtx?DG)n2?Q1F`8pd!f@4N8Sp1UQOj)C^~RW zq!ye2P1kPWBHv4Rr)v&)4z3j4kJaX0iO(Td`7P+_#qocqhSMqu1NtB?9cq_7hF|%Y zU}1YPn;$G>3Z{O5YnPp=)=edPux%vIU7ARKJ)6wK90cD}?FHC6&6PW{i!giA4cv8f zG$a(f#-;mi^M`T9Jfm5f&loH`3oBl-!XX`4mHiNn7kK0TW3$mPD22uAJQy`plh9V!@YI1h3A> zas5}BXtrVvNNv0!nv%SgCvNXVi;>45dx!zukh>WmCkM&}Hnq)`3Gkq@4y1N2;coe9 zFv4U$pKWpw)Es|8kVzMAt&yf0SE9k$vz&O2I{?G_eZg0&0)I<*!Ba06FwEEwW-ko* zvq3YdWJa54A1T8NS0>T+24(u>aK30*S_^Ef4#wO76Dq~qV0v;I&Q>s>T{l{A7`kzX z5uR@dccex#s(;6Up`e|6YT7kBx=0Ss&9c+1*NxGkyp>%0E@g0~6%UdTkQ@I;5c)$)XXpAmTP-(Y%w!423oej`eDEMxCA zgUGU@x6tpn9p=72gX@=mgrIW~H2dXR@#tSl{DrqX%=mJQ{LFXahi@##-dDSL;pG~n zTfTz5WCkpie=g+A$AO36XC|7Xz+`__3VRDRzA?y9Ux zZVQ^PjzZ(2BW&E#NRU+=K(peM_=Yq8ME0w+(P7_8yxE@w0&Euh?aa_(bTmr*%Mx8Q z2|y345U9HAMQ(T3vC(HHHs zp)Y#xKZ^~SX?Q|kR8xE8HCDBb$BKH6>LVlYd{q;>VYr_e-xua1_e*P*Jn_LfhZA9N z$vb9nBpR*DV$knM9Ba87f$Npth;K{FgVk&!44Ze4R4kp2?VXR=t-Tu|NaqyRMrEM( z>r%36m=cTMV1qlxI$`#g6u3=lh_z`k`!+cMQ|mWCo#o@2mn#k6153oi;}=71Mj}ki z3J2SL`l9?FtBHAsEmZCQObR8;&_HJ{=5~A~_rn*XiPbSC`%M3>ihf+RqJ2Yu)_6;u1MP9oiTbu}xT$XioPLqY{H=0{ zvb5m7I_3rmIR+RiT}(a;^LUH&JHq~49%M}hz@t7xEDIVmnhC68vyBakKTQm~cSfyy}T^Rs{R&k)E zuo0hy`-8;UAgcrwUm{^;PloSXgKD>wu&VhO;l4IFLRRqb+Bl&6j<+P}Nd*25SO!TR ziJ<-PIT<7D_`lDY1>F+D{?`cb&Zu&H{`nX5Nh14^nuW)94`VJlhZtY)!}WKa5RZ3D z#viH4JS}1t&p&aHbiUEy4zZ0yH&F~(<0Ww4);%I$(Kxz)tsZRtn?^QHeQWt|!FOgO z{eV2&_npkV7LCmr-%;Ip7+(3B03)JQVdkqOyfWXE1N$>aw zyChuq&ye=Lx5kP;KXJoQYd&CfHg6c$3A@{+c)*T*d`NK)|CTI^?}G1%{$_gMYJ&{+ zxGMqsz=zMccLR+*#?tPM3xS2+vC3?oME}O@pjEe=xz>-N{K&BknsoCQn`Pw2cb=QT zCEaXk!n$pI`lW7ox3&dNu2P|qUi(n-*HKtg^O2RjhyV{!q)2~|JX2J2MuW+KU)lrl zyRjW)ud+euu$zKIQ%owqD1wBeN{#(Q5hFH&|5<-9>VK;x64!Dq_xG7_F~t@n$D9ExWix*&W)W3sTanu z+eSU4I#>eov=r%U!HMB=b12O}IEcO{8N|3GpL*Rk!4#QPb~$thdu}hqUtI5n`EJXubsN@P;q_9AqiU(DX9 z9fGx93#h|CEo!kX2o}xz3l`VXxZJR(*mgjl`?-H*D~noj*!NRHZq$Q@=4U|vsxo?@ z^C>8~yP#~$5mCE-0f<8OQohlM4oOJG(x(Zc8C|Ey^@QPkK72>&zAJI}no35#j=D#|0>Cq=X-19^lj%~fj?pa*|7Znr!gB1?{n7uHUdJg1H99kLsK>eQX`iYTwJz-j?exHf!i+$*^^r|^x0xQB4Z|Pve`;U z9?3yXt+!Bo>pj*@7UQxDtH|I&G1}j`0zJ24=;g7a>C;qaR8C&SKL%L9!oE+UFPr1} z#^-gYtXhjQ?Irw@sh!YwNTYA!_CS50J}8*D(*1$ss8jnUb|UaBR=*s_a-Ogjc ze!UF0*dn+A25uzJ8l>oM^)Pn*!))qX_lRlrSA*+G6Mi&OpGQ5|1Ur}eGL?D$eE+b+ z81Lc<*OKD-gVF;aZcBuN_dA&0GHJYgE{#7kjmIOCWMO}!4_{;02a*k|aH6ADNOVi3ph! zg~*usy?^go?^^Hjx3%u&&fVE(e?Hgs{^xCM)4PW!dX}O8^1m>qDwC%^RN=2j1ybE) z6Phkl59^jk;p{E(5dF56?DfqMk4;pkbcP01HmZVR>x*DyJ_B73n3IqS1!^)=7pD9W zSOXOtsY47OFylBZmDR?@kNx;^zoTH^D0qGPt5I%9Ge7!Pj(Exr0@v|panQyB2wFRV z?$p{xejIy57j3nsiw_D6-~Y7v?@ixHhg>O%HuB?(5~3kos!L?jkc#tP8AE9#Me^qw z@lRVXt{-X*r4w`@_lGrGW+zlWj~t5dM{g&$Nk-VuG-KbkQmPj+Fu z0fZ*c!H-@&V#8E5mhtY^w44&%-R=S2TG zT(tVHd!kr$ayL70<`ipwT~6F2hd@WxC3aieR&-H9=qdd>gO9PDCD-34j~cAtUaK^= zKR<|S2?tm~VJ+FzF$%mSkKp9TFWFj^dR$xL!O8``WImR|Rn2*DaK>&-J9Si~)i2C1 zIsW)SbcB@ec+VV#-zDM~hIrHQHjxk*SnK=dWBC4okP|Y8m>DZGb3cI(WO#?|`yfK- zZn7?K$gmC@cUpAE<1jwXjKM+_Gv`uyxa4RDANNWT|8W`OXKRq`>h(eG`eYD2567Tp z15_9Hi*qDLg5{=B;AFA^46_1Rpx-9;FoS`6z!o&<|w_X#)fn!kD=?KXPDG|MC9LrjUJty0( z%9#I=R+0b40I`H&Vco?0aS*lD5*-86nEYiaY-y_%HKuehCN&gIo~^>BZap%2+GwQ1 zfPQ#69xrrt5Dk@V^ck80u7iHT%(iyC(D@kd9(f7}l!acVn=&1-ZaCD&SE0)Ar{Xup z)o|N*B>y7xH%(&1D4&)M8POB?x4X&$r$z+|+W)bbgO#{IxZlWZ4#9@2hAiM&3M}}u z0iQ)}gw5ewu()3mO>Q{CGute2q;CVWjhQWu%2*;w6Yh3D;ULtVwo&w=xrA*OyMW=P zV8UkKB9`CN+4J#!xZu4TbIIOEB!(%0#E}C;J#iybUNw+yc6!Yw-q!|&PazN`Ib9?h zkN^>KahTh6m=u+MAvM=+Sl9krLTBh8dA5#=ikuFK*Zyf|ab|Cr;_B((Zd1pc79q*a zawivGs^EVkrBPx_5_$93o&{zMhp){^;Ir)yhA5BZ(Z8cayL6nzd17B&x>bwnOO&#Q z`V;7}g)Xq8F#vKlZNORG!aezrm&{u30ygZ5Qizx<+ySJQvNcFXB>P;`#jCaC~8zhXaQ@^Qzvr;2G4)yaAu7Og5?MwhNT3PiJ+jq#XBqRwVopNO3W4NVJ4nnop)Sb5p% z!W|I=1xAl#gVF2qlk0yx^W6&x7KZx=OCAQ9jGu37#vo)F|&id2Mem~v< zx?jAA{(BkNJWn5TmN;NVu!%6c`Q!F6gIGp`1Xulj*m_N60#2CmSnN?9%}j~FpS8}z z&mB|wl5^c4%IwCS7D>!#%5g?RQdsc6%V_gyB}h!y;H_*byq{LXTnyHujY|iK z->m}k{A*#5cO8=|>4jO|Ry=CqHJGcL7BY)&+40q&1(-Zw}mxi)okJ3E|j&q-t|E_a~j#w#m@`T}&` z(nHpDKZ6Kk4|-?+S}rT?$o2IEM|jLMs;VpG2q)`8p_d(AHsxez{VX21)1B_CkfP#_ zAX=Xp!&hpL#j$!W0ounLNnDi+_#W%O?vxiug6zfa}b-V}u@+cC*5~ge6q-R3+6mMGZwv`Ke3xYuB{pcxt?+0*bxOOKelrJf!=Us))kN#@`BuSK7p|v$MMF4 z4Dkiw9{AL>>m=6i5!0Emg_-M@V8q+)kYTRK2kPvBz|rk&_MA$=B{+tZD!1XlUz~ne zXu;Pl4W_RLN%KeB<7wUVGq^f-0RLk(l@_!rbM4wu^oQJ9Ix!}Wzt$Xyn#q3z)~^=d zcJ@E`dnAwFT)CHq6tzIo`*--LP6vISTMAu<9NhLwl3ES(6|FqwL{IIt;C(lI;hf$^ zK4Z!aanPuCYlWEC%&9|@1_)=cx8L`YV!@#(6{E(hYKmasT6q}Y6H3hgXy9jqtMKP? z8!S`5gSHd(Y3RIU?mAbIe|+f4ORq%Zzj6~;m$ZqyZyw9Kb60@r?soLF(&mBc~@#c)sE(WK1*UCVLyW!3SBUJ2?p~j`@I8Y8EG#Pm!)CiI_5F9tIx$jS0&# zIKu&SmDdCIEsp|5p}d~X)++S=?{*O7F*yGmB* zsXk@?HDNg5Ni!B0*@NYVS>Wp@i%RYzn9a_cB+1Z}WF;3ciw}8h`{itGm>Wp~jTK?> zz)hkFHdEoy#eQ~MN|9AM3Ei>OG?bmX7HUTY5$}NY*u1e4iL%f)3WPdO#7S;8Fl+AxsFa=q3j>dWX!AI5E1E{c(XC`_<{I3$^bq+M*&>Sc z${;hg{$)}NBth9z52NHXAurbhT4q;?I;Wo!m>);$+h#^%6aOZ@pA(Dz4c_8+z6oTc z-5VBVvbVnINx$IcFb3NV*`nUfs;Jz&4I2AwLF!C0=GW>$et|rcuei)YW#hnh8KCm- z;W*!N64Wi8hx-pJVSDBrHabh#b;hhi#SLXF+rXx@F0H63ato_BT;p>u-)G!95^Fw|v#W4aE~qV;>n zLDB9ZL_IWuP3XO^VlkOZ;d8;1`w#|HDV1umqlii2N7#<#;Dv&*14I3 zu;1`8A$Fs|JVgqlqGpI}r=GNSn6ig7>Nd0NktIxGxJkYLbz`_Q%pYcIR>HpmN$X$A zhuEN%{p6y)DV4Q&g#T>R=%cDR5PSYCJJqUARh^W@I$sca0>{z}&quibS_4jWjKXQp zB5_HfKa8z92<3g5o)zca2!8Rse=nS6cb;0+T;6)n{0Ub}SlcXaJ#It7>gbwh> zzlM&G(Ih4xo%V?u96Vs)M?)H}x0BqsKOb+uR^%bKjL3XFb#8I73EH!TT>J!qySPmZ zN_8tSn54n-51)wYAsvYQ(hENVo``1lO4AtQk9gR+3WXFJuD*Bzvu=cvs|Gt*v)v*H z`F0gnx7y&dx`*W5hi>Mp)XSb7iGj3J2bghhG-UXnAhtiI3v*7jIDWD-zPUFA6!wc) z*LR^uG`~<3ldcRU&jgm@(Ji3qJ_4lzW@ExUpEkXqeSo-abf>(uoxDP&7Yj% zuAd_;r$J0k$KpA2B{<|>MIxQ=khFsz#h06tp<(eQ(id8d?nRC4gv|+YLT0?k z(yWg?&zDE@l+jpw!3;#bRj|hBIa&O^0W<41;}Q2p<{+CPocDjB-{v+rvqhTMJuE=g ze*^gAzR7rg(F^#uXb!o)@DWLioW#9Nl|=sQ%Ehby{)ByAYjE*xHPHBGOE>Qn6YaHy zpcFn2BE_FsLDM$WifIPha7)neRiIxynsL>tR;ahR!*4b&xvZ&ttSM0&Qt@q!hQj`jQFuXiG9=31hN!!vxkKV!@DSeJ z@5gV+bc;evi5pIpUr34_OY0y;!kR41?YDkWqX>7m?_&~GqiEVb2Uz*y6vS1G;|=|U z`f9}s3;<5dTN==L&I`7(7X@$bI)@y=%}>5oM=vO z4jBx8@~=VR1zCEkdIocHl&1xW?Py-`@NC%IBnWyim-h?*?}n&95N}qA#Mcju@Wiko zsQtPeb_;IHnDK95u<1zRA$5_RxMKrbzNGTao6m`qU!~MHR1Sa__3gMLCKT;*u98u4 z6@)vr!i^^;baa5b;1OL9E~Pi|zy4(G$q~5eweR8fO)s)M!;`B!)#K^FObVIa{Exy|oT-ul zqii#wlljtq*+aPf)j)i{Vl@l-IfbYDdhid~pWtp~omJmaYw*$;N!Ld$f~VKt;d<+A zY&)_U_C21=y-U{POD*A9ylkN|(VHGGub~g8PT(uY6rkC_bTI!q6Vji}q`$3|`0E{c z;;>8q#GC&*QWswpy8h@$cv7Rw_jM;jW!*J)aPIQ@$@9je$7nBZSoH??YhGjyYOOf> z>?rDL8wrg8sX4%elO*Xg^S2l@>9C*&TTEOhifDR7FQ(U&V`)GsysKU^|LG$D3 z#5-DiYsoO$cXl?~AC?D2!(R62!vMT8)2ge?5#l;`9^+HcU9cNiTtJ_uR=_u|=~m(#(U z2%eXj$KSk?r!TFuKzrjA*i@SXW2|L(i}_(_tA9!EZJ<#1qE~#dMvc2JJBaq%+MvO5 zD0MmM3RPX!RC>BRpVTv$4tIKm!6KnQulhk8EhF5Sm;57vq7syt;l<$s*E&JEg7@=P&|8_94`%l=#nTa|Rf-XEUQB zPk{%biAs42=%o4#2W(3Mc?f2kN8Z85FWrfgnLdWBQ=yG#4#NJZVl327V^g>1u=oXu zf;!ci`ak$7v)$@-UDH8V2M&h85VvTR_|;r z4Tjs+f|7iO*m#x-*?C5p>BUX2Uo+GXt+$(y3;p-;OtlMpx-|y-zz0YdqIJOvaqG(j zRyW3ugv~R-IcxU8hWRI0;*dF*-qc8Tc-#=nk1YVx0aNPx>VL2^?<{b8{v233=P15U z{(*z%2B1&6JGM=8!zSV0(X}gt^(&2s>Hr(m-FO+)8>Zm&JZUE5)=Y! z06fnM^Ng^wywEityF6Dw$h@uCaxa5eZrDKNAd?;Pqu8Fh3T?~tMY)mNNYHKrxRkJ+ zHB{dtQ6?Bm}XT; zw%&|@6+*_{Dep0J-EoEt?Oc!bGv`3Vr#tnE-ezbvsfw6bRkN(U1EDD3q*~#SQ8$CttxS-dMFG^Ov}g@1$ zpe)?8#SL2z6&J$L-Twtsi_?Xv7ur~n+a6N2?~2Iw`zPYBR4%?F=Y{q=6QJ|fJ5o7X z4U5e0GDDYAA&)Quw~eK6Zs&QnYA$ElQXA;e8xka}u?%XyYzDg=9p+c?h8&(D^nhFo zxlVQo3Cw;>9=aC5?6J>4&wVhD9;3^xu6!iMllL%{nNl=V*bV>LuTEti1hF%-qL7?_ z2`q_fnKyThll%&*_XL}Y}puLF1#EjYE%dkIz9>LtMQ4cuXP7~lUx?J=K_0kWF?q9 zRfQx4ZD!#iLcf~=ug6i2)EJHe?e)%3Go_O3oDq)!vdi#e;Tdvef)@Ta&ja778$i** zNYU17o@n;Fk?fAt!Ht7-@ygt6*1R=}Bve+QrSEWu-~#_o~R>8+qcT^SX)k z$v6=9^q~0Z3pp!fJO{i{hl`7TiyVi(MZI)CQD561vcaIrJnObWuYh1WYw$Mulo|Z z4E)71)VJv)TVs;h$i^7>si21+Yu6Fei^BZfJR5VD`9M{El2}4L!K$QL33AsalMan@ zZ2RZq%t>b(Sk6_&q{?)*-`ftmb0BAHdJ843mX#w*QX?oG$RT zBPUITr9U|<5ijEIRtq7{@&?vhABUg;U&MBr8dTQy8fqI$(NnPl@puk}((lP&KI$+Y zUND^RP7i;<4?FIZVjXuo0k117;tpv9)1lfJrd$s*KiKhGQ*^K_ zZW0ff*<=+^6GbO9d|?j+{p$O<7ua;WF`Tby1(kJVfyUsr%@@KxV>iLHu zIjs@e7Ul@=fWfZo+sOw#cWSUkgC8^Kf}s17ylu@%61DULNmRTAOWP8`s=x~lhSfs0 zTQ5Wi^T=xNb+EHEpEo|Rha^6nTZ0JJ58HytHd<)us15(coy3F(cTid~nNQByE$~bJ zuzB}5t64DzOh4_R4L|jHW9&dsa53O{V;jlU*et$U^a4M=ol3VYJ3z+E9TPiltH9fZ zYU1m&a_ByR&3(sVA_=rSjV}+KBBsK3ZQoTL*ng;ly?^A-k0;Lp(*vC#(#)hogon^7xbi z{Nwg^@g3hG{L`#RoVqoMRwo(JRX?*?PqZ%gy0?lQQdQyCht`wd<=eqKwcpy2TF}U7 zpf?2;=<`qZbh^`VzUK5PY`P_nTkmIxrj3|P6RKljnUxL5KTw1jKNRu&psU1mc)xIt z+FSSEJPZ6$ZAg#lY=w_QhC%ZsE!tcyFp=uNvH8;H`O@MtOuDjzA98ua>`$4Ar2KC2 zt9lifm*hk{T_xb3(<-*+fDJ9l*v4<%(1!E5`rtIvh-n>qhQanK)Gy*26v)f+fe|J3 zH5>EcMo|VtIfde+Cq~?G_z5=U;TdcQeoiL3Ys0MjChW-f4p3VVNjJyE9xSc zZ+*)PzKn+tODD0ecfUdT*bi&NNl)=BXyb&CZ7{BHA>E{=!nb(ziBJFd3Kdlm@M0+8 z_A{4Z+}~(!k{wPz31=gqR1VT=(6I?bXyTpf-%J|h|@8I*HmpHUy5Z#$G7S;E? z#7iE5|2KD{e9VS;)<|-zMKZ)l>N?UbaU$v8CNSodGo~5OIJ@gs8us*T7tLMrTBIy} z9KC0)g`rsnO*#6lP#`XS(E)Orksfd}((8eEj1cx!HKRd~rKnFP6S&#Eu{aH}^ z0#+LSfmzw7fx`-O(HiNk_%CHNY*jJ9x{E8A%9(D`*Ji}VR&8PxPMhk(#*Kx~za7MT z_8uUmCcebZekv%|siMEHJLMh?bRT$uR&~fH-~Ke{f4anZvseI z`-pv9W?+RzG|1^r$IdceCNE?ngZkCrP1;iuwIie6@Om)G9y0{4&31-ehU+m&__^KL z|CA`WykmDPz2K&c!16i0gL%J5t)FJN7g{5X@#UyBcuak$==cFe*btozojXk6NM<$u zdYTM9PbR?Ct>4)fuMtA8`f~kV`>#aec>&Z{sDalhLp*mc4dzrU6KCUAajpDH`cPUQ zZqCT2Q~X`H-pqF*6)nNb^JO7~UB3$NW|tE_WfNY1CS=S%9snnm`TXkb2_Uy}3T1l) zCf9r&>fx+O)dM8yuRGKD%ZCa4>Bd^r-Ma%-U%PNRuz_*Q1WbDA$U3e>LFm1HmUhAu zRfl%6dNpUfARdepTPs;;$3}d*U>L5rrh`S#cayT5Q>_0?7inLc1j-k;GqcEGwr{!M zi@f}mY;8;-j!%3AZYvi(YfurbnRgElCp^Q+=A+Sc|ZY7#9`@&Yz}%@ewIhZPSEFtCiT_ z3A4~B|CDH(LQws?z<6Q*egPL}&Bk-~8&NW#2!kYlu{rTk@N{xI`!+TK)TCasjaQ>s zw4cCzUf+l2xoR-$lM~uJEE5gi@dL+-qs1~0wPF80ck2@`y-7xNIP3eK%8CXCK)A-$EYz`2IaY*2I?c2`2PI#3Trf zp90@zN?rSKj%SG za5FfVd9i0VHxi4Z!rjKd52R|A2Fb3Z@afBBl#Y&pmTex`a_FdV3OUYh{GAIDtpV1K zvc9;;Z6r!uj~AO5+$XllpG7G{;(;HUiJk5iDCMPx;paDj5*Kp!OU8@;ZoLbiURB}v zSyEz)X?Ym=Vjq$1b7oUUHetZ0!6dp%n#1n|=A@ejHW4TBu+Xol6}}%<^^mM-9)U{)(6Oh$7g^v$Al`FCRYe z(qKHjZWiDA_Y3wZM?=7mXm+NcrQSQR2W#VRgW|;?{%@`d z^fH)kc@?aU10Z>-9sczb@j#(7EiJv1^bf7VllC=el#@rkwM~Q6_--68+zZuM$be~F zvS@-xaQhrR45!Z~!pM|gII(C0o=wez3+t1igmsge{6k>3^Et-OHHCX!;XJ}`D89bg z!`i}AAwu9H*1EjLl3$B(&}|3)K1`9ghTp?n`E>D%N`c`kXNwQyZh&Qg3?CY@h$l9T zf=spDVCrCj8G09C``-aP%V7un7;J<3^R)Q)kwfT!n!E6yp)Z{)aF7z;D2aPNA7HIb z#ay(!2F{-!#f^G8aDL|s^xCwZtJX{MeM`!4tGpIB`4b5%$E{(Cmo(|NvJvzpD`kp) zv2aVug>KuM2CMhQz^Zy#YH=U-MmZ2Z+2Jrl!sl0sjOFWbGfHltNFrYJ1K+>E<)^myPnxaZu4lct!uNqvp zXC?GzCSqlgF2A2?OoL><;*BeEJR&WP?-TB(VWS_92;4@U9aiC{L2t11$ET9*jt}u?-84SxnhlQVU&D=U6VUUJ1T5+PK)SVGLay$1$oV!HZ@kyQ zALk~E$Eo*#*~*Xb@5Bh!R6doxsXGLlrKUjXE}`4DC<$horhwh_z2x89!>~nJlgoTf z2WJB#T4LpmJyMdOV6zi0&3lan+<^}K?*vSG@`Ln#mf|+{>h#3AE9Clf2Y5bXKDixz zf!zKzUSu&n7hXXo)=m+AMpP>CPx}-$N$3;oTDseMibpIyG%dsP-*xH1x)b=~*ApQt zwu+o}I|YuLD@c{eB?!LlBz}2kCYzr31}m<7#QVd?)3W?flDlCDA0F@z=QBMXzs#N9 zT^LGPvn(Itw+b}hUc=wYsq|8m9Nd*W3hA%BSWnM4_Os0&n`b7$-w8vYZrCQ)a$*gW ze*KzAtxFIXC$ZRNW(FyIF)H_NB2hhk*5{%XF~j6BagL9JFOCMFpcOB8Jpu)OObLsr zbVbAcElm0Pb+TH8ky5K0>}0YcT#Yh>4XJVBrBfnt+^gyMYsv*8$~C~oHKxSPG9SrK zGc3(KjT-VB;Qn+ojL>*be(DQckdqsj>FFbwJz+LGC#4{qnf)-*zym+GXyO)~g{U_+ zLoEIMJ$a@3n}yy?V%HR|krB~)g`VkNK`>33)weW;&J2^O5HWvDYY{A?w{cNp9EHT?Nl(bel!|L`3kWhbD zFEwoz2E7!xhBc4tRq0*k9~BQBKPm{R${{;KqVUTH6P7#HjuaK|7AtQtASH8CSVxP% zNek2#_0%civ)fjpjl0JYvi3NM+gm|uH_e2pV<+O9P#<>Vhrk41qyar!7Q$xH2#7NI zL*A_`BQ^WKll{9>p~gdC{wT(f)(bkY>#s6ySBQfx&pk2t;e&d~*u5lZz!406F$D}= zMYw5i94gw3MswR(T$o)8k58<`zQb~`HfJQr{CTrHgptGa!&Gs^KQ%>lCA5ai| zDX;^}n)#rvBn2e4hv+U@2ihvGn7cre{k~U0w57CRv~VY9n=B)Kx8V@E(-e(WX*=*s zo^Y-gI=n#x63IukQ=;}ofiPMr0AGAk!`ACDnDTl6dN#je`73kC>;MD&wq!Bdnr(yr zuOG!33AW5N!;vk>cu$h<-DdHPk(e!4PufT6fP#7f8T$J&d)Kj>DF52Rf>IWeLkspn z-vN2iu%`#HI7u0lLz*y=DN-|)0X*ci99hxahN9Yda#&;s#vLJWBQYIZ%%Z5rKr43E z2;u#Q82EC}6xE+w5H^S7-;NYqd8`Twg8qp+iVE1D-}~{K!eLes)XX9+I+?Ogvq+-n zU;Ud|mzezN%j{u!5_m?e1OGXbiNpqbBuUaBaeV`e+&2jP_iRJ8ZPRgTEr<{J0^2G=xRE2CoQ$EXgZeVfB9+0-Knv_~}v!4Ycq370H=8#)V z6wi(VT}wYS`)0@VW+>AKAHvDn7~w4WZY128wu7B`t48xJ(KB%2P#!-6-#XOafa30W*MPpcy;-@^p1O*Q*0ZA4BK_mW9&`_S*d zY3#C-4QMG|VKGa!p!`J?_SsaTms=rCEt<)I;>FkcuyVXKJ#gtHe!f%z#nn+T>+Kd==r$EPMx^sW?|Rt$;fj2ca0b07u|hOf zKNceYJphMmmiSF2OUUk<(yyA0{DF8KkDE6V<_-yBHl|#`{^v&R_y~p z^~JRUpTp&IR{USnG;;X5HP=`-pZZ135ZLX51Pr ztR%kriBR$(lu1N6S)DF+#KLwD2zev?`z|RW^|hK1a9I|AczL7Lh%LBFY7=@6EhRoe zU+c=?1!xg87jMKHvupm9>|U2IhV^h5-zQ|HTWlb_ND>UpbFp>ca-yGn1iv=EKs);f z7`L~L4f&ld%IVt9T=lGYv!XO4HQdMQ^Xm9yus<4T8i@0BKcMR(!IvogTlC*~O&B4} zW&_QaqWSoXFmd<_SXq7$;pBH@ll!)FRyGcu44!h;`30teLQpvTGxYsKqVwvHB zbHfgR>an4;V&yb0*%isxJh_VnwQt1XZ)50^b2c#YZYuxdbH(Z#8Az=+ALOax=P>5X zZ~F9G4*pbi#6@H&_|_Iv<2)O2m_z_w{Io&vkPIW!tzSZ?nc=upE!w7^Fej=g$|zmIBgRZyayZ83Nnu5r8X0B35fnA_@1xSzZcf1Gfo zzZz6{c(*F9FTDWiMeXd2YbSKvv1T4O8>r1-3+PQ;LDfBk%)|Y6Q2po-^|a8YUI&$^ z&z`}23G9KpkMyW@^HWlKG8TM0g6Vlt4SJE%UIJ@!Ytjj{oqC0+ zdH-RFm*0_$L?KNpwuToXKUoA?{|JUZjdU&zv#&R%=nU<<3R!-)ul zxcNDBV_+t_9Tw3COXuJ@yLfnWO_^(+DaGfhiP*HG2kg2}a-K#$G0PRMy>{q?k~Uv=K1xV$-kgldkJSp0b zd(T|Qs+_NoX(|N<>onlx>*2KhS1kUN98JPI-1x=!A>`^OO*%VV7q>~9^A%az5Gdrx zmPieQ@tYpw)y=VRx%L|*s21YPcS-p2`WM(gIh023^q}rjWoc?i2rS&+4Mo=y;6R}& zH9we4)n$J}iryz2SMdkz3!}JqzXkQZr$g62w5F9e?vpmTmpE;JK5Prkq@zQPsrULc zI_1)EJQ6+}U6=nA%^tY{|9+VPX?r&UDM|o|r%_b8#GUfV-&sNVBbiz(8yx@h2FwMV+(X#|CpKW+bL=ZJR(w!pA3sOUuGwz9*7pr z-3v3M(r{(PLm1>6jMIxxVB4a>plp;xGPI9~4hAek=TT2s)i@oDQkh0R|9MS}JA+}# zi*)?vmM-MOqOgDWDl*PH8V_{Kb3OIHqLZUO;*X!p$uqOZBDts#0t<(tlfND*+%yAf za(A=l7Bl=-mI`un2cyZ6Rm{J*N4(%%C5d=87}K6PFtgG>qOCtiG4A@5wM-ItI-{SE zi|HxIQ>OW0-{RYU{cm%}lOr6}%f=O6o3a5c5`nVRlnOY^!{`e%IA; z*lzKacy1X1^=}j5?q6@{cl0D(dP478$^)MzZ6Xy*s)z-5#T~oG3c0Zqp$|A2t{(8f z?Y1JgbX@3E1uTP_)uwo@BbH@UE5Z7=%VECYhiTmSf!P*5tT!u&AzySNLHB~Mc;cfq z*!fZk;@+NQEe<{8jZ;5sFTO`wgC;>zQV?q>epK(!HiET!Mxc6$4ryul6@)`M@$`-6DWDAEL$E&j#TV3GA{8J6ZxjU)|Nz#FY zLrUOk6bW}_hOjcT5s-gA44ZPSplHs0qI}#3Ew+@CwGT$4XIVOFHqK?^&c{KxRsy7r zABHUri-@Pp2eRpE1ln%@EZ#Wbo!HWG4Afi6f`8aUcJ0O==DPEw*gPf}t5qX0e9l`@ z%9&2)`F%S(`&i)I2>HW&`zxeqXRUbKaeM6YzQkITC$j8+lR>vUUmR4j4$8X(*Nf3x z78T+H?J0kWg#R|HhSqhkLN^f}v{=9$;|>VW+J$@X^x*!Co2>ZJU2JRj!acrfv|V6D zWmd_eq>w?`d9fIVJ13&alac5hv!4E4Vv6b71vv8EMK)mAApZ92Tt0|;LGS`GoDGV@ zY3rhJ{IZMq?{YZKwX?_YvM8pmmq8?k`-r|pJYwp<0*JX)GM?2@!k}^A*ygq8*v8i< ziC$O=UPvE}?XF2s==O|BtZ6yhC?5^FkC(9Zjw8X=Xc_d&nquUS7_j{>7UXT5Af`nD zGPKUJ^&}OCl7)~kEDb&>Qz56iQA8%}A^vmph=gQ|^=cCUC)r@AToa9}-$g?9*=6iW ztt&YF9zlYf-!oylfa>BEXghkCxFygQ`jfVR?YL;`X6prTwXzDr^HbT6lbx6+Z;hkk z6|iscVZqD#9k=Bv;^eumd_Q#IN$W9i(Tj-A}Vy(x>>-lS}BJd!x9N;5c}^ei1i} zz6#QN7vQzJUPw7)%I*8~C`wE49|s?!L}xf8m`{gVD}VA-$%EfNKx2H=%N=GK$=~GSvPK*NMlpm1U;lpmPegGEM|JcFu z5vU;{Lfvoc!7|N~nCV<5HA4o#M&&PTZPhFm{^J?3y>UdeyJaell}W^RZL#F-G@<9b za}wm1Mv(_M9I&YDkJzi%thrtCa>;_4nkH+`idI98DGc7>2-m|A{hB2>!yw9++L&$Xpy8!0wEQxAS296ILu1!EBLC z`b~Ce?`n8DznEy>ZNQpaRiw7~5AnIW32XJ7;duFE5;0@$)XOG7oVT}d* zS%Ah*@%;~W?3t1s-nu%o{;h5eo3?zHc#NtsCSQ2KHmI26DpDwl7`ZlLRKNn2|Nj}y zOjj+B3|+oDLihhYpWgqTZ?t>F7~%O0XAcnmXw24F8=)^erhTWkghc50pR9AFJU#z@ z8Z$3zh9GGB|IJ7|pd!f%8yzjRQPe-G2-jw4)k zCnk@@SiZ3e4s4A>9c=?{|2>Tj7E&BCK37FGAF@DdKrv`eNoC%nbg0+;MRdNd9bR91 z7!P>4!lyn5GW)%rAk#}heNjX zK{Qv~ggw76zzC};(4abjSIgdnIpz}Vz=Jtz)zQNYfv22}dU zI39e(pO?s*!Qg4bY0CQbpjaqugv)!O(%?8Y4^R~iNHc>=+zMLMqlJ@i6uQ~lpl5V3 zyz!L)W!p}C``iR?y}5wyPCC4Q1MnM(#c*f^!~VldV2f`liyA3d8H@L$weKI$Uo6W_ z(_hv_rW) zv3UGbAndw$oa~CW#=SoKaCylB*!yh??fv%_zHZ4OY6z@xev{IWt7uJxEh?PTPr^jj+~I(-zcW(=n8=EouIWg5$SJ{8LK zE7*?3KUs#$5UlY{Kzo~bav zk0*SN2-Eg_y72uQ(X7r(AkwL0zvdi47q$St4Q>J59&Kb36IpbpDt)$WCDj);2wK-J zz^@P{uGLE7sf|yVq-7qgygLut{bnpi=Hda9mJLPHUHea&sy(|2B_q%)TUg*V4$Qz72vYUDM&Rc_C>0P=G@j zhskb>p**GVJ9@8iL$znU@K2xwY}oP^LjUun*6W0Ko1*}l1J1$NdtdSBXHOPfGFTwh z7lVa*F9v0uN9R2y;={AQ;dQMm#PDMVpLog=XFf9|OExS(rL$x3mqQpo<=+bPZl{Bp ziI@!w$;ZL+74XY(JQhhFhGF^k+}*)RfZjE;=Eaw=Hh2IZRB{UxqgDB7N#l_3PZJsGzDsc!e zl;6c}&t0IOISX!I-%U0j-$~j(*wS?+rgZpoeL!NsbNs?d)lyj+d+ija>%@T1x4WYJ zt^KgbZ6X$zbiwOxb4*gO0H3uIG&%pSwfvmvJpM==Bp&eQF(*1vEy@`mw z^)Ou%;LQmD_3`m%M}{oLhuZ&P zE!$97Zo;55BLNP|@|fXo(6&^XYmR(W{}Pk&=gE2&_`Dhl3I@}JgFjGzVHM_^7Qvv8 zGH{}l@+_()*xbF*{<0F4o4JUzC(6)J+ia28IUPsr?Lv(q@-ST|0Q`1)Vd?V$bYJES z=3cXfCkW(~7k+d3Z2ba!nA?p3YovK#PKSL{mp? zBYRrcQp03xwB*iQCPI?uj;FZcyAJ&+5MWLobHVs|vg}!CH7lHSi>)mv7h#Vvzx|~S z9W#r7eU_qb8#aKMc9)=vO$8~xLX`A$!drVLz&5)xPVTq)6@)8K`}eh8dd4gubGL@PJy)BoPTXJxtvS?2hIF#q8g zdU&G@H;K3|?)UzNSCXHj_%X*{aaoWm7zcu#C(u|F>{IjBkuOtKX|>SqaVfK=k7J(T z;mqB1(xV^5aHo*yFtVcO4~60Nh37>HO|Kz1-47qm`HmLr#*=@k!Z%_gq3#2g;Xt#^ z)bh(=R+qjOhq~nBJI8Y{Fyar?Tev~6_cUBMMKBF&3WWAwMKHQ>2?Y0#W}S_5ASO|d zZ@4gmE>@P|hjQ)sw&G~4_Rk|+X!S4q;>J;EPZ%yMhR?^t@lMG?NO@X zpM<7{@l_~fBqJguiGI)T|6Zqa>O7xu-PiTLW~0pCR#xrcAPmfELW{e(!130g+l6!6NrPZN2)Y?Z4z^)_YR~ua7}uTgyG;B zDfUlB7W6fT;M+S9po~^Dcf1KYkMKeFhX5Y9t%$#G?8Qkxt*LBH0Mu;Kp(Wfc-y-%d zk_bz7XmK;&3dQO4d>iuk+Gfo6sAivP1d_-tD$uWbAE*613ey+#0{`4P%pGWA8sg65 zVXH{I_;4k=O0)~rlBJ1uH;)}8SMl2&e;i4A3(7n4Vfeyqs=nqD6kCqMeg`ueH#da6 zEf!Cl=T<`5D;6vKh0yBSdUC=_g$6rKWt`;48G)KiJUsM?*?EIY(N`vsje`?NW!*$v zngM(&!eCS5g4s9h>&m72*fd|a6PeIG@hu|xA3sv3L!biyqa3o!p+_4oPyB5`e`O}9? zP`oK_tj<`Qj!;`!PQ{Eo=di5DbM zZ*UM+SG|GoA{`o3H3IkjoAHd~APQ&5W5<^1q}N}Q=w}wz#4>hpc|4YBRc-(?V}EM3 zlsn;aO+eyJ`OJCmeOS0A7E6nkyM%cr< zqI}#wO`IAm=)qo55wg1B8w{`I9A`UD;Gf%EOX-*yZC+pvsZsk-EGz?#a!$5g*9mX7 zvk^OWQ#h!NS(2HdAJKe;3S}2QhtSjF(3muV^aN5|+kG1D*Lh)hRuXDUi%|ZvNK8;p zMuSJEfjRdL$6uc0&x^eR)&`~|`KS;z&`XB(9zy)?Cx1}~RovWipAV(gn&LxXZG<-fN|>=A%Cr&d$6=-a~ zBqn_OoDo4!3VxBx8P21vH&% z!&J9d=rQmN4qi~872g$EBX51Gmv@RetFnMG-mMSjGeqd@1$EHu@*Qneg-Fd8F{nwM zjcSEFnx&!xmp9!qA=qd=!9&2ofg8c@3;1Cpx5h@+!D{YT%h zqGz;-#n)Nvxn=K}ul{!QPgVmQN$tXHNE7N|Jb50g?;M|9T z_+zCK)MW`$Q&T;1q}~terR=Dy%2aB(pc%*Fm*A|u!zi>v7pqN&*zdY-_*?A|CZusK zP1>1^>alXXOI*qH^YKuB<|Hg?$->u51gW3xRfvsz!V}==Htd8E&XFNZ^VOwM+VdSN z65*!qAE_emMN&g3zzvkeA#zvlY?F9V2JPy5UBG9J*}!pn}i76sbVydC>j!($aEsNfsyQ`AV?NX#fRfq07B1I+)pXP;Fv_aOqTo{`*$mX})!>O|JsOB$4 zBkn9`c*hv}esdQNPW2`drcbe1L6rWK_C(186X^kAGa~AK0K#@%ft$;sNQX%VHYXp0 z0#kdi`1ygqt;`S)xT?T*V#=4ecL`&h|1nbxc4F0wU)VNNmt5l-Ay<1ZfV2=%nsCR6 z>{9c=`0RL&cJ0kRm@31}8@_-ODmZ#`cNSamHXh0oo?z*2?s;;JhU8`m;v)DBdu6R)MIzLFU`nH25#DkX`dioC>~fz**!n67Zp?*ABD0 z2JV8%EGLpCkxXSiK7<$fYB+YPrY3aDZm8+ts6$cWM51ml=tgj*+pMYR zl}j)~@-4cR-(YU(hT~zkG7*E%_dVhC>-k)wYMwbU%pHV?I&nw8cLMC4_|te@x*ydSJJUnA zli}J_MQ{t*hK_&qV3%7W9zW}gB44=wwU`n)wO)cm+q+Vp!wcv%xrN(rbU~HmdFcB!|UiE8HwX>hjChFI9qJ811#kquv;x-VMNUvFNk`< zcLg(8qcDhW>m=z8=|UKN_JOqs&%&hjVIb7#|MyJujkXD0Xz-o;n%r_g;y4JfqvGig88;g+OTL}|AdLilW4(j7|9%RItg)>CN2$3z@DTLuTz zTVR3EMCNdk6%B1_hAq#6aq)|O)<&%qr5{P~YNt~OTdhRtL@DZZUJJfm3uYvD#Nyd| zS?s*6Lg2+{!>5&7=?n`yy073OdoQO8>leI4{!B4qA9)9Lgl@pP>9eV@8wck3NfZ~Pk8N7t~kHYekM`=5bB`E=;| zElA_I_ucP&g9#0kp{FJ-;Vn~@A@Ucm!Yc(M5|^O~e+N2v)1N1>Zf(Fd6aHcIvYtR* z>1=wl)&q;gFN4v)c|>xo4OeAo^Zk7Y7+kQTNmE`!oW%?(7tX@aiz?Q|>N+ylb}^C0 zr5L%{8D%fuz>(D(7jW_+rQt0Vl?m>W~-FvIjRx@*5dQ)^XfRX)zf zId-wD*JbnGCM#1%fm=APCr0NgKjz=~Q^rt}QA{XEfq6D&_^+)JPH5yXHI}6?Wtk=$ zvS%*}EKo&JUL7MRyn_C;n*b@1TwAl@MJNf9qzB|BY0v9NIB`b^6r6L$&)J4F#9Eq= zMqx75?H;=&Jc#Ky6$_8N)#&|qg5dK*fF=~a#=eOrSe)7k<2`=ZeXkXdUE*A5X72%V z6p)nnvu%bBcwEww)IZ)uwTI^5#u-0A>0kjg+N4vHes?A#PnlF*k)l!_-*MIUS%hBJ zp_O~}=m*U-s9fa7#Liv^`QOKwxrr~)`ua&+beSV?JQgK0YQ$-afHQt<_JI9f>*2a= zHNU+08@er=#Nku^FL`uL#~AJ!;U<$mHqzDK z!`W@qtnsLCC4aZyDfqERn)IK$kL9NGX!q|JjF!`{5dA9_`TZTU_%pl1+wnw{Y04bBj1lmK6ZCs4DybD^{POHJ3E1g1uQ8mlj_Mc*6h zk>vg>F!4~@Bh{VVf9noy?;JDz4!}mNFcj-eJ1W!y93KO?QFY)80N?K;IMTA z2u-=lJA6%?&Odh$EuTwb<7&>mdn%m<{qn~%tA673{421!`3Q&|KZ}R+d~tH#Q?~s# zq4Se8Fg@)GM6B!pmDgsp`u1eXk}tTMyOVv5&gP$du?&QJb~DH0hj~Rm<>9XGIMgo+ zWJUH*V7y1&C+T-^0_UN|apeh{sl1!oLmQLGcT>UVRhhS%?op z=I%$3E1U>p^BK#pBTBd&+tvi5bI*{k?o8%0l(eS03@w{k6BKFKIYSXS)CF2=6v9-Ok8PEMH(qRm@j z8W!`4JvP;d*VLp;=a%f^TGB3a#3TXkjp3BA**ixlOOY;1r&>YfFGQPzm4Y++dq3SeMdCAGsXl=;t%1mI3=pPJ^{pB|HDgx zPnnAO@f;c06RVGTQ#CnP5>!+SVqTiCYLyc%IBiIjeWl6q94TYz>gK$rM z4ND(4;ln`!OE;sg6GzM(EWrB9FRGRev>kt@#i@I?%SQH6d4U$-c&(ZfIXC?&!h2P zEPHMXqA*7W6giv=MH{qh)cP#w$D|&nQ&JS{Vw!yL)ACdT4;{7Y_ zsQmK|7Wb%qg`))QmH)QgD?oVWoUKL^1bKF$JKi^=!(`Hycjcb1$?uMm( zdKfHKgLAAWQRCk^c(Jkrb1t?(1~=VH%2-5-XKADALHe>H5m~ z1Lp;4$&Y``<$WRSnWA=#TXh_Cx&~2pEFNQrxjU!20Lij3f$YWyXcTq?=B8@X#{11& z(sB)LtrwAC^`4t7%NV^VpnGD!}iC*)UsB9^dByVo5hoG z=VKs>RZ-MYB82e03VGv-DQN9V!7nEfE!w{^&;P8Xug4mZdJLd|&M~|Vlh83-j*5(3 z;0@n$VqdWBAnF)^FKQ0(ldf}A*GZqz&P)bB$a*qOd8hDekUr*ltJ4pod-)m*g1~#) zJ~-tY2^)VW^DemQ#LJJ zYogKhwgas1Pr<*;R`R9y172FRqbBO0CkpvWqT8$Ipl;-aczOr}|4BkoOf|@^xW$GZ z)h5C#!*GV>Vcfjx3H#h!j%dD`3MP{pm`$I!W{$x@ZlZq#ye@MZm*Gwrv2Fsp6S3@V zmlGsSXAz{X+ypINg7oSk9n#$qTrE73hYhkq%*&u>?1_F?*tJTCG#Z_Ue)lW*Pu>T{ z_;p!J2$-#H#3`H;>trP&Q7L1SsmhEkxP_!nM(9px^k@@g;MLl%x+=y3I zy!am$Boo;mm(gvtBOa8TNm`~@k}H2zQ9;8KAHIvnhR57oYicJ>cvy$LfXR?CZiElL zpI~Ep3fpt*1@nAk5^FhH32~i%tcD}bRahAO z7dvjv1Vh*3xNA{2`|!sfE=_Dm@5${3U7fv9@s-n`#cQx?RXKjabd-ot#P5p~=&a8# z!Bple99C|Fk#{|~*xL%fb6SzW=M%gZ_ez+2RUDF)g)wT1CJD0m%s!0Eg}kjt;9;}^ z%}?YpVOM_vzu_~x|6~gIFXfR)-*;F&Eta1%><89f2Dsa12gY%SbDw#0;a;9Ljz|6A zoRJR@%H%pdS&ZcC{Sc;z${`JxfN>WFh>k`V?a1 zTNpXznK=0BI5J8nVQEh*4sDj9i;SjY-`@Z{QiC`%K$xzX`5F|y?WdPYs#r~Kwwvpg z!nk(!v;E<=@H?GC-;fXfPM0Q%e=^t&Tsk?-)f@HC|HDZ&kxXG+DevZpEq!JE8go{^ z1-l)4*wyKl2y$F=>0<8Q61IZVbb7(_eGCSrap}|&E!Oe<1z0q^865QoQL9B1oy+z> z&MBzLEaiUYwFT|FUJcXFtt2w#Gsx+&ai(W?J;wfi=jib;8$&EcaOVvL_H=_dbxO6S zA)KctH0~EqJ)jFRj;w=i+&#@}zd3!Dod+&TuJmhBCsR_a15Kar!%bTmCj9G7jM09| zh^)GelEjZq`@RjT6fVJB`FAku^d*RYJj@uUn-PJsCM;Q`Nq_%$6+FAQ5gYSv_#QdV z{9d^e_@%L!nV>}K|1&11FPV~D;Q(~GYD>;(yn%<@jC;f33@B`$PF>SOVRcqKZ~W9A zkQh{l6%vzSE0+}dGCxLr*_44&N2w=;Y?y``O}_G9&7+$Cc-QS8Gl@^~V6R>Wq<@cRm+1FG zjQV0M6gUnKuKx!yw|2o=hYui@qJtNDTXAFI5?E9*m+D&^vB@i&SeN)BP^%Gugz@#v ze2%u#T-ykq>W#3e*@tnJai(t4TuYB;AydN9i9kIC&xA~++Z1%Eve*q~`m52@c_i@Q}_*v(eP)`jetU_Yoss<*pB*r}_~HdG7m z$u|08NS2xx4AxE*>B(-jJgi2*$-I$;3ls8pvzY% z_=n4HHe+_T0K28zikH!*$Y{p-k_7>qn855u7^{|q`yT&bZ*Uv9$6ifL2^6H7yL_Ph zY7%;yszSrTZa8$s5|wu)VA>{iyzoMkc8 z({tFfD+Ix~y^a;PGKXWg$8b%W3EiYHi~bfn1eYHNF*nT5V1`~J#{AJvZWboT{%2|ayqrQ z(7@blgbI~DXO}vKGvc-qI8>Q|QT|%QY&aBs_FL1qnsbo;P=V@izXNYucQUpE(QHP# zE{^`tpucrDl7>(9Oh;Nf>hR3S-^82vuhgD*eU&$rcZp>EP>EWsREM(~av=Jj7P%rl z4VT25)0qRV@TIk%6HH2B=IuM^% zdm3Qbid$ZN$8JquGH+%M9t(FR)Jzq_zsq1>?lJJHS`78=8BEJ6Tk7v}gSEMzgt7`} z@sc@5*f+fneTM?^gXSLGI~c$&UG^42S|-4!6}nWru?!5W{n-L53G)1OIG$TlfWPl# zu_w#3U`w(X`CG7r&Kh&W1m-*}^IeHcoEBrQ?IB#=b_+FIgsFFBJO9U|omf|w0P-Y_ zZ3#1_Rf$zNr^p5r;_rdgX-_t}Yc5@`Foy_kIe>T8OM!_<5i2p#f$ZwN%D3J5iJu^9 z0y7IY(=hiwP_6$3!E+Q~$6j-4d8>wXVL2^jg(tnO@RZHS{|kFnt;zdkcOZ237-nDP z8VV^A1Z+K7!19m)OU|Ve;GWV7Rgcr{ydl(OS81UcL@%>*VQktzde_PM*|PDuQ+GMMhRpm%I=QhnWF3 zxcb(4v^geEkA&U@x%G!w@0+jLMT;lW+DArI_vIjCFl`FGZE}v$u_@*Mml1`-ONyB= ziAn60-IM8kSwan3zN2JZH6yX@9V0l>h)&IlkbiX?w{DNdS+Z|%M@%|qKKJ6B>E6`E z#EO-Cvy^ktXglccpzrOCmUPBWyur3T1)~3J(?172mhM4$KmxTS= z!~XMG#{Tu@yz83JOh<=U|YD7qK&zchXReF=1 zvg096t}vm``__THdp6s2!xbh32H>t=6QJeL2j-Wb0WGV10GCr=Kx^zpj?F=DnNA0`nh`U>ZrIIHIn~4*X<3K??Up#mrS0<}@ZAjR z6etNv`AJZvSBRUr1lL0)CEDHL0byKPVA47{D*a{xaZePW$(;A{#>K7lP}x;B-oF%H zpNWS~na$W1ZiMcB6>vyIhPWRd1O1@IQCdnVVP9+!zU@$;KYhLM)@K2FM`a)L#77=>>!`3U=gP50 zNSXRYM?pAu##&K0i8YeScC6x38E33qLAm=QPPqFTii79UX)(oM>pX=nKO#(o4yu!F zZNDLQRFT@9m4T%>iPOdP(#J#hy?;O1m%VsjnJIz-6T zCR@h0Ta^B)9%Y6-`uL*5;!wKgGKLB1(`}11c>`O*9D%;a;4)74?0Cb3eOIS>uam&U zvyORcqDhw?mZ2Zs3KPxA;3!~RlD8=soTL6va%p0_qJQA&XM#*hM+C^%5r;gsN_`a&(xtg+a}VegHKVba}NC;$s;LJD~aSU3QlL! zF>69CZdKL6f$JyW9sSKN7!f6MTlI*I=NZV(T?YDZj7dW5QQWg*5ciHLktS0s&QDW} zi!UvOw1s01#r525Kb3108!yC*!s29XPYQf0=b@&XBH8vtksb(3Wv2~Gkde_4OTs{B(!4cHwG}~sGb}$_|fbt5`>`ntNArwD@inTZq zZTltobB-(Y-aE-|n>d5bHB1J7we76U$z_m`Ifusme1pbyJZQZw!0nx;#7v?MWs1k} zk>mB6T(-{9a=!xIe#C_?ezJ{g@qPymvN90x%ZyUTSEw~bm?V8wp`9GqLqewuJhwlq zE-xvE_g?#q@S%2 z9I%=F&FR_uvTVrPHVY`*@e!mI=fjVt`%K=?oA5;Y4rV>l+t9>47igO}%2QHArlM+~W;R?vL#SXjlM8*K%z z+>62E+bnukNSz%UP^W!A_TohDcmBRvf;{8>sI93$rE8|s<16fFkj7pb;robj(7DcD z5G{h8VPEjo8VUm-PB%Gx#eFxwqD}uh)ShTeH|E=d)UXe`iKAjC&sU&6Li6B59v`&L zqQJ@KICiT>!JoHL?92O}_$xGny&IZeQ-Fi0p@k@jr`Sgvw;_;1n zws;0PoR-ISW$r}V>3Zajo*reyrcmEgx^z>=U1)#Xh1Pq_`C>+@Fg<7l%)La=`JWYA zsbYmn4mzxN=L*{K@;5#R;g~I3GAK-i!!5cJCxO@=Ea7!hEJL zM*y#79%APOt%F_dhV*xcGYUVeMRQ#v_-o?KubdiBcINHIV~=~-j2<2Nl7AOz7VW;xWiRHC75G+474*VvG~?g)LS6|<9%Od z-zmko{WhRcWl^kyXEr`uD@$g-U%{kZ)S~hB>70*Tr^bBIVX`~vD@rdq3F67#Y`1C= z(-*V|F3iZ_Klo}-Lm$Mzk6cUkk6Z_Cbj;EbM-p)9mg#h&Zz%76!4>Rl^@rz0 z$FT9`Ff)0* z)2V`B1$Y!lkW>LXDiZJkGGtGp)%A0%(&TTLWtN4Ttw*6=VkLYPPQj~gUNGsv6XZ&7 zNbt>FP-~w6f0e|cJpMa6_4e40-I3u>6TQl6?SIZxn=T-I%LJH~o(|NOyoPxq(O7Q0 z6Gdzu!ub$?Do}9*^2TQ zmm~Z#KM3?HKJCP42XKFhYpN}x_zZgdLuQYs~Q-r23 zMwuHk^1<_>2d!U!j7eA(g)=wHQ<3`D)rC3NtJ5~y(dQkHur!F^iX}p9dGk@Ma59AA zl}^;;;{_a1L^N5rnC!TJnfZS6DO<7mE7S1K7^g6!V7s-Psjsl$*Yx%BF5OPXmXZ@# zlq5uL-Gv}$uNXPnG>o@&&1va{i(EtTOPIfWJr(R>$eRU4{4stmTK~;~??MB3PBn*_ zcv~69(*=me?P78(JOETP0iMj&=J{^@$9BeiM#&IS(&AFh{9LF-X1sfdI=$av|E3i7 zcvlMh@7vj$r6qwF{7rxuj!neW*)nvjmoVu&UV`-2I&itVika}M4y%GDkhu=$!P<_a z1akV2^YU7V8QMZGC;E`0n@E}+x$jVrH9YcTVdLFC45e253IEB` z(n^ZAo9}S#Zu4QmMiU5HB1kkBQTSQ61ww|GLiD^Y$BWggz}RSnku(~Db=p-Z{hUj` zPCUYN%@C(&-Zk?SBBW_P_dQ6OFGd!h=x5tE$}(4ivuYYNR9UyEMNm@Sh~mckz~jPm zh%B`Skpu2h!G;#V$?-GwwP1cgcjGvEVoF zR=*4#PT2;*2kx>JDRIbL8iv7@rKl!)kBN|v#LwH0p}yr#B6uK>&HK2W{PC8dgF!Rc zDV39{)x;>UX(#}NlM5mCmIN&}e8YKXWr$pu5b?TXMg=&p7Vm{T6K*R{T1*Yl&26ud}wkT%2wZH?_D&-jj5W{ zT+so;%-2!z+$&HUl8SM5+sN!&?;zvx6uRrC2|N*sWv{Qh3eyZWvp;8@MvM1JHP&+? z=%q3}?5)+Nc5bIZVp9XZ@XT)Z#K##}-_OURSNqu8ObwRhnL^&IpLl;{I_+AT&$hPc zV5vqdwj5DLC*7Br7ch$o6-twH>(gNVw|yvN<;0jq#Di~*B1de_U`$g_BRAe>MT>fH zTBj<>9lL`3!~ZzqSqx+GLmordmh-ZuHaHH;7D3w+d9rlfWPDL3NH>|BVvfHRLF*7N zwET4%qklNk4DFAwV(kLV(sZOB4S4jF(oc}ND1y^+-I>*yRqUI?9!zHcVe;dcI(<9# zidEbB5iP%;f`<1S=z)S3Fux&6uUp$9b6^Vnny>;3&T!sr^D^EVV|iM!_aq~mTmZ+_ zFTiK3Yhaf-jBX;FU*{Z9w*#sq{p&D3EEh$=i5ledsxwT>+H$6|at`+0m_fvp^mvU=PMU74jXpBO+hiF7nZV*|0GI|@F8$#D6~9*91@0h-T-;tUmlrnf6V&5WT!FMFUT_#xiQ;=!Td zanyh30AH0niCL#8>93nbjl|RllORk6v&}%_UI%kZ_z|S;sDPXhYihzi=V;o?(O9bv zyh}ysct2m{bI{pz8+^XL!#lkR;PRlF^Cw4h zzHJ5a@Ph{F@@IKbe-puCu!y-Exq-U$AxGBj$5! zIzK>x0qG^PP~y*8y6Eg_+FWBm{8ne}C`IYmq?Y%Ny-Z}~Vl|SR&C%L$k zYa}gdkfL**D}t6|D_fs&kNNP(ki3nHLiOo(wB?E{wFw>vsmq?s_5Q0k`r;-d7wSp) zZl^HAULHudi)b0+onTDI0!italW&t-2vfv~nSyc};p!qMoao&CttpK^3U#{!s~ z@`ZVYCDrw!v&&Q{c)jmf+(ES4bdVHQu5wB#lc4lI7`xo4NL6Ax;dBiAAaN3-(8Aq5kFn;H14f1ah8rms!fhXR#CLE*7IT;I0defZ*U|SXrf69Yq6&Uq`&6EzmuCVz*LXU z+FZa+%yy$PH-w46{SWZ>Lp5u9;sBgr6EWze6uGwh9SV)Gppz1eFM3ZP@2NP+J==rZ zrkl}UlBQHp{0u4t-GT2#T^Knb7`^u&fMZ7$s8-u0s1sG7rTLL-(@a+Bt!4)p!l>uIYhj zYBJBdxt*OBZAS8#7a;5S51a}+u(nnP&rQww;O?|mhb3B# zC^2OM^K^nW{!}{+?b&rb88FJoqCF?YO|<T6Io72n!Q1U*}C;Eb3(QQ&sDx>{`^{wGsfRDBBP`SQjPAb0|e zil4(0lQi&QQn*>kHOJIfo|phxINzg@5zddqk%w|rP^lDM=DcU0tS`n#%8BUxCIuQd z2V>&XOg2R06^PhOAnUhJ#K=cHYOiksy;{v!pH<;Fr??vfKPN)gcVV(_PY66Id&{in zW{~ciWB5MhrufL-4Xf6+poQ`;%nwhbOHKRv29s^5-VF~L|3`pqJSa;iw1lvSedL+b zB0(&Zu7LHQUC9drHP(Fd4JNZel*;%h0q^-nZ2xIUy{r4+uI(YFmz#_HoOmCHX4f%u zK7GZCELD1Lw*r>NWTNYqD2$W%$)-&5hf)1xh;I(0skMZapI!s;-=)ctbUU(a&O;!- zCR5im7uu6o2gfrFz~#vvTsPQ?7ApqW{(EC^v}hZwI#>*M#cwc<(GEEFO^s?bA3~?? zj?~UP4uVQ=VHB4g<9!%oeebBy75NwN<~A>S^-&Inbi|^y!4RnXu4L!hFNfVz*Ylrd z9f9ROYIJeuc68v@^`$d_ReITsW3YWG@!nrRKY5Ln#98yVv$@~21 zDKio1=FW8HkLMzuV92tTZxGSl&gf3XwVl z&BgkRiyX4n3$~$+$~88%U7B8Xj=_Nj0iJR75%zb=Fs^kq1S^Tvpte96=WgA|c*O|N z8JlH@c0)YQe|Z(G&V6D=6|Z9Xy>ZYgR0pBbd3=|sePkq`Yrz@jV_gxnL@ z<*}P?cG0It`)@d+VQq@#?^kO&Ieyu>)1I9pCB%zDlMttXx=J@N82zlFO z!0a{=g5ugt++aI^e+$l{{ecd9~JE5h;qm6%prKnL2TK7 z3A;+TOdFIslui}JuKia)d+QCz47>%~3|z?e!=BJNVnY_HhLTl=*-&kuK~f@4G8v%* znAdGh|8!o4$jDV#^Xfj%o%@tI7b_2&Ke~g=-}mhOL#Hv>!Go8{Y4M@+V(4(iCwLVQ zgHy5((&iBXGQW8yWjHN#e6Jb1DE%3CC(p;$XPmxdvYGUs^#REn-X!B+0&DxA8S+fI z<_?n^_`{b+HvFi=e}^nc!jaePU)gQ2?W7!ag<{4ovV|YiYzgg|CAdQVI;#s>^v<~t zm@#93eXn#AQ;LuBEh4O`)sA+~PzVxopIPM=fV~U`Z3B~Dzs(49u#=RprH5;40=?~EP8kcuMBN!Tm+8BHR`aCJo`!VXR98_J!HT^r!sta)_hx6K^wZv#}0C`07= zBrI)~#D}&?@UtcZbz&wFJ;9S`Se(dIOvz$5=-*`{)Gj%Gtqf()yL~~iO=9%S35skXPGI&j60<^b%CK);* z4{Fu-AZn}_HosS5=Cr2a!fi3w`nrNYy4--e`QPCGF)`x5(>e&>Ul)LSQwHOIxF2e3 zoe*+z5Tjec?Ev@Q2j*c|m@&*Q%Yqkg&f}fgFCm0H#13~mjyj(TEoT>St=#j;wq5VR zM?)1B7VpQNms-o~HRTqFE4Ir3bSkM9D7No?hDo{H5GIGneGcI$ev#kclSn;p;5 znit9nO#i@_{BMLgk`s&mi>|`$o?74!EXTZsqG+~03J;H1Q>o39h|aoF6mR#Y0t-_x z>YEp~S@4+yQo3YF&IRkodGPSA0Jt*@Pk-G)QV zRF_b8ZL1NuepUka6NO-$Jew*S$nt}1+;DZP0eBgU(dY5=VC3j^_;4Z#cJ7{w>Ff1L zlT-ltP`H9z^WBR)!}A!DR>O$)Xj5OOeT@3?RQA`bZ#bCa$*@wYHTGLN@yaDch0 zQzuBDtxDqu`CMZaZi&L(|8e!^e>r|(+jc6=N`p!?r&LG@)wPagu7rqW_-0mS$vmdH zXjW)aX+j!F({&z;kWeY4loSyWB~(gy&inbi_Ycqe7s%DsI?r_+`@U_(K16)M6lO7G zj2b(OaqY^DETe2G+G;L?u0_32^4yExZMu@?G{SPDlh8eF9x+auh~XEqL0?)F^_J`= z+Ruz|EDjP z%mF#JZ*~lxATcQ4Bf$>x_l88D5Zv&s2{%V333tyIp*k&@_oX@Ez;_>O2Vo-;h zrT4*%q^;0;RDj>*DHl*X4r0!fq09bC=-YRbL)o!3)=O=>8bSzW-zOSeO|V-c>V z;?S_t0CO7e!m*J{V8#MvL3W%2xSXnhiPJslryZj7@t7&F)AKuqN*;oKeqJI^N=gUx z|B@KT7(VY%&7CXQ&1813fhQi1$vFpecBwdDxS+5E{_#wuNe{nauxKi%Rz3hv3BZq~ z3bcqn`|PK+z^r@Lw0pt@)RrxWWw?+%xxJSw=o-aV$SKmrBfXHD%jb)`fd%&81DoCn zLap(#w7#JaRkTc5u!RB9n&|@z_c=4F(+=8YKcIgNKWjaGj7;HYxQEL{!SJjKyOsMN zjx#c#E3G%t8yBx*RKOyb6{O8rS(f#SAvbzMw3S?R8v)d72K&>E#PV}0s%_@K4;5$l zJotGmTg7*vjmtPaZQe955&)sblI-`WC^VXygMMCR5c08w)BW&^*k?!J7%MBL{`Mgz zD{qB225n%t={#4GI>a5@>W{0>`Qxv=B}8h|AZhjRg>Lr@#drgl zh4ER>>OrvhuO0394qku0D_Op<5mkHb*}(Eq^vS6$5Ef}pA9fysiO+K}L+2jGY}(3H ze;cAyiYqk6%VYDo5HQ>w3aImqTkG;0q?47nx8@R{H+np~ye|d&?2Mr=#zf$#yqYch zZxQ|T-)zXV=!3_{-N5UX2SogN3Byw);HkknHs#7I?&qGHWOgub3#)tyH^t1*Y0OWY zlw}BQH41Er{R*}bK62$lhM;IVPM9K{LduywxaWFf%egW%ol(Rcagm@zhVPO^_zV8( z{KXixJ3K$YP0-fm#Qw@XCy(E^ai@NX^EtaOxcEyFS+S#}?E8iilpTx!mw)5gjrD0< zLwAB;z*ZjCc%Fex+EUzHbKXd*XA0BTy%*fAFT+~XC9uj(6wG?Vad2+{CRF8dvwcKq zY5rBBfrl8})d@$R&Sy1Zvh;ztvc? z!W^EF^3fkLD;YgP@4;8OsqD)@E!n<89ZWvOiR1YSE$}9+okV=;cwq^cimK2 zkgFtqmAy-p!qvdD${cF%=7FC}6KVT$0DF}cSm*nlVBa@}w$G8}bWga#&4T5uz&RKj z>y;rZToy%>w21U3A#_$Pp~GjR@aKVC%q)+9xutC!Z=4itduR`f3l6inD{qn$?$Yq{ z*k>>o55i}LS%g#7Mg71Y?!xnT_`^V(u4tRiW>v3cAD#9>-#BAN%y{0~s^{d5$^zW% zQU}w5RoJHq;aE2K4*xyPf;r(=Q7D~AzCJ1k@0ePF)yd~jccmU0_5gH*hhXyiWmtTu z5w}$Y!snjzIMUXfNIvSsOVj13R=x?>>ReM|0eorf9HwBEXcqG2DB-M>sIwo@V~u z&osF<(7s|y-D>8t32AOHF+K=Z{pkXM^coCUA;pp&aim-E8oX(KN{oNyplVwqx3MP; zRg0Ek?=gRNZdDyBhUN22inFMnycx>xMnKECN>p`m#K2=K(EU&v7M{w)`ES;6m0L== z$>}9{$-K;5;t%CZM*GTV?Cc)d)CEV@Y<QX-+D?_TG@b@jyzOjTA<|U9Tx$)fn#~bNzTRAA7sE0cy1JFFKj{Hw93#ZUacLTw_Hd^2)xq?isQepn{#33@5aao~?uzpi5SE%H|661L@Z0ZI0 z`R6DRy}S%U$7|3S{{HQtnFfD{8}Y*BC{83}Ey{1mAP;JcVWQ1z!KtQn-1KWMoH~;Q z7LFO9#P367R=p-B+Eq9|=Pgc;Hf0h4&Qv#NBdm=PV_o4dAz0@ur)8E3?5ZN|Fn$E# zKeee%-d61RR)rfqWSHVM8&20J9?}fnalbfVlFZdhhQ3XFg+F}#1PZ1PxLY|n_^{?5Zc%;$Uz}Vyf7#La z;8Z?j2_BQdAZwPpccCC{R2YUxXW#&zrR%)I@f<;UCcY*brdvqRGT%dj%8?70%bpd~ zV45}N+>RpNGQ-5z?h;B{@z%tY*EoiM-nC}##|eCAXvSh~I#@5x1uj}tapN{k%nMF-wycioD&=FD24pL{larb$A!jiFW{g)KerN3<&J&uMeS@Ecx<%- zj%~ONj}NHR@zz`5;XyrC6X3`O?54xZr-39vT!|H07*mJY9;`00pJ>iL4ShI@{&K(1 z?Hggj7V1{QGLv_B)KrKAjn6@8>mPFEb0C=ei3zS;aHc(j@x-`nG`^kJ#C=#f5zlNM zNjJiKn|Y^4L)NM3>{hfGmFA74Q%alo?v^}V5xX1LYb3#`V|o1NWJYV`PSY!m72wse z6hfWPvu%fSVd5eSHb+OEtt#k)GK15!c7NE z33hF|iVynsa!Q3_?8r=4lB?WLI;Wh$gpgc3b7~wbTRfTt4O+6e7m>I_%b9IG^aSfa zctW^+1XS00pA7E56R{+Gr6Kz>u>q&XL^6eY zr*Qt!i`)d$rJP>NO(^btgz}Bz>`u}EymOL-cV9MO(Va`ErfA4&I>)2J`F4I+a|lKh zy~iiRfr8Fu%Rn>iFtM98g-UDAf*&6zvtrLI)Yvsg=qPH2g*^r=uQwEj!o|6V1-i`P zksEHb7zq`g>P$V=8u=h4)0{kkrdLS|H{Io}v9*~X`lC~Lpj@1Io(iFU6_+t4M;^Lw z*~6CViA?5x5Cl74W` zqiE7UL$I0M#Yv932n!d>hE$ZkEVzvMKL${G6oFF$ z0ryjTIY>ocfKF{onBmulH3tW=b%qPEc>h?qa%Mb8iT98e!x)lwe;YKcbLFAUy8@%oe*PS8d){lfbK(;9G~C7HZqI_iwjum`VIF(XABVk5*3c<; zDp7M&C8PvZbNjbG;6{G~roiV9udn$dN6xRBe|vb8mIfzlWE~U@N!BJckpH?Y}NjVS~k1jtf4$xG$CG~vhjsr%mYpQ zmEgqr1jW)bHrwIZ7(Qzt7L6hNY`CK!2;Lmw*?l8t5Yyu$VNua_@G^GfEF7&N{KHi| zxXg?2PZr|8;{cg^x6DST;}Oa!FJRAnBG~>7TC`tv57rd=3)3tnqx`&W{$Fk z=$Kj!ts+@?CC`}7aY!pWn{))T6C)sQX#=#2X`%C*dh9Zkpel(xgYw-cq4WqdHg0P% zxBRm>l>Vc*Ps)cAY1<>Hy_5nQMpO2~Hxk=ko70+gNBEvk6S2J}1EwMsc*nz0(3@-k za~GxK)F)%uVOLE$K(jz}?s05U-VEUl%i-Ye3D~u!zAWbK1 zD~&Yq^S3QD+*pxr*_?^iV?)Rtn>FOVP7oNy>d})uVN5oQ&q7Jb^E;~toacmto`#!O zEBwEt;Qz}pHu_&q@WlV&1RF2+H2MEH#-{(5V=S9*zf=5@^U_p`#%6pk% z_epUigO*(7&qTp?v#)q!X$Q}BHs!Wwo<#52OF+wCjLYSdGl_GkhUajnyV{@3|zw5j_{ie}r7g+SzQ|j|uSFVl-SCy2qKX%zz?k-WuZn zgj3oy9*3*#Nw}v3l~L4S-`iycqg~9Q+c+8*>=dWlR#d_K_dHYS&`SLLco}zC>CMH^K_-D*!%$Sd)1V73yVigs~*HmP{qj7vxC1(p4`_$5Yl zg>n-f#q-bBi;&!|LdCeNZ+RzBSFwqi@Ko`-}; zFQMDX>)0W>j_ZxkgydWDbjt!)7;SnV9+|1K`2+IoYEC@rdHfK(Nv_6>1XWtU=pGs^ z|Aw(u`>;T_oLemWgZ#~Xh(2+BxOL`A@|M}rOV;Dq)zmJsq)DSQ2X=Bkpg|w> zDM0WgImp=ZiNssIgPBh5oEpywS(h@N?wP}9@h>C|mvG~_Q=G+{R-7)wGcE?EkTpkzpe8no&h!h$qTtV@M<|NstD4B?2%!Hhh{Rhb zKH>E4RG2XDInl2^0(TCqg_Hg_g(Ir=gZ#SPthlp<9KUKvKfR8{F0)1S=Ic)My8H$_ zM~`M>7fymF{s3PyRQP*98jPPZi)lALLc{q_d7f_uC)T9`2WL*A*PH!Nk4AHRh@csu9*^8N&<2e{77R@Az2eC#&jf(v4z{1;JWZfY{6kRcmh1u4^zwsYHW?CNf z&E0}O%P(^uN=^uz=z9o#Fa`W(%HmhiqxiH-5yyQpq2r4}06&kz(8^Tk**XZv)nmw( zd$G{#DND1YIT(6h1wF@`u!l}y@8&3?g}6B;7|YOw(&27%6Ao*!@n_Xm0vUoIWGz0d?&+!qb-okbF-%uOo53h z>#(|u&pK~B#WkrJVA$J>HYxWdk;F|FBqpfPzV6GUL`{pDW2}(M6Q=IBiYy+}#|63t zl5Zn+!HuqZ3|*m&cO)#B;X1;^xL9d35pT@mu8RA^j*B~~*?eQB z$?rFul?EW?_jlMgw;g(aeItnlJ4o7<43HSF%|Z`0z^^;j4wdI)G{I?1$J{t5K%=Vrie(SkCuKBpgkOWbY5JgzGb<;hcUl4vx0q-t9@k zij9|XpU(qmd!2$#yM!40VLOxf+AZvOvJ->n$iM*o%t;%)1Dz#xXtrsE&5xIF1s3m4 zLyfTt&AS>0QyX6pxsVxj#Ksub!ruee7-ZnL`!_fl!#-TS%8~6nB93P@<8WHiHSWYM zSFZDuAHH^t#NX;~Q1M(F*O_EYTPL)l+C)uucrValHv`&w4k)yXuTS*O4jnx??79gDGG*C+VCCyzV3c{Ah}R$*?NKFj~r z#u-US(urC(adyEh80mMG`)L_X9$Z-rm6O$J7)=7Jdtziq{Sdy}lnt`tjyN+~l%-yM zjqi7R6D8+Dfrh#S_jRv6`+Tv6Yu|bq?>x1IFAqo1)J={oN9e-_Hb{W$qc@~?4c-@^kJ<2a?;@biopj8c*$YddDM88zZ`tqah?=6iU` zW+59|G#8A2JJ72iw{iW)bp-PkJp*;oR`8C$2N8{B*$sC!N1%AksvtdA;FZ|>d1%1E%;$D1>Z|8f$fpr z!k;E;f~+9^UCL`dEL!#k%>sMj-~2a%A3a{s9%jrW=1(Q6N@k4hYR4zMLGZ>0#`n2+ zjTZ+?R;TYHX_?Jr=(R4LaAFo6r!$L*2>5)rWApL`?Z*hZV5QaJw@NKzBcWBPAvGg8reuh`0k_$O~?sAozZg4 zd36FAyzxQ!Q}s7IGHV{Z)y6E&qV&1vWlfHbOJr+xm?%HjZ}BP8rjfh$TT*X8>BkcZ6qt~Oi*r=G<_dZLKYpB!s3)ykgb@7qkpTip!Gu-dMp>s zn>(@T)Em-$@F~VA&x7cUKCWxMEuC{_JXK27q`uy6Wm6C9QmwKI&_2=3eN}X!NAEs{ zRrY-M-0CB2$PuCAc_y_-O$seBP@&h}^LFIoOL)M|4?^UZ;FJ;P;BvGwvpYMA{sjpp zzCITe_nyW;iwVTMTaQ**|KRq`_2nd@Ux7<)8=lNC;T-@0IML(^h`oJ<{#JQdCtg9U z#^#e>UnFVx`RnkuWDv{xHTmkF9(^Eb!=#GefnRrL>&IdJW^pR0oXITFQE@OTprnbJ_R( z^I5ByGsmNiaKoYx;51FZGLj|meeZs#Vz=@7qop`J;t03fvIN#I$iu&?Bbdm_G8k{l zz^5<+F5ozLYxPCwAa6%@crK@UQhRZPA#ZfsCjmiemh6t*R@%G3fqKpN!8<#g(V_GS zggC3v+rC`_uZ@=>%%lj^4#&ghg{?T7)q}-{pD@zGh|2KBrh)qLEdOpOc$|)gp-Un( zp=6LaWxc@Mz=hcN}ETvkmm?9x_} zfAA7)J!+4o2~+Wn^JT0ySk5)6hYZtLRCgSG_vIn}Gf@DWB0XmFM-+}^=E8q99DbiMgQmRD!nT+n z*uQNJnvDw<+#BDGPd`zMcPIvgi*kKg4C;FDBkb~4_TC6xK_ zOfEH=Sf@j66-=p?eI)E}OTpt51jg8!+X~Bd##75sfay351zuB*c2MplWI`d8@|^oID|9^#{Sd*-x0r|b=VkQ zx~*Y^=7Yz^Y(cinC$6=`5!i$t*f7f)j%m;14AYF+k{L3vL+Lu?ACQ4#{I(=QZZ~_T z6Ap%sW^CuiDDH97LoR3MWIAm~mY$>UAg^FATW%W2XC8%^Ti*$_H&23n`!%dQ9m}~~ z7$DtYvmw{67nde@k>9z^f~yMep?_-t=;)Zkh1g)Q+NZeu zOsIbSA6%vpiG|Onu+f`82r?f=fpg}y_`TfnB#6N z<9!k9bzt$|WyJ8vci5m=Or-eVzbk*Y z-{&x%z0eQg&e@J(rG7Q=I9?4i4|S2uIR=pIR*P?a^iU8Vfz!{H3lo3HG8y}++_srs ztZ0o7;=mkuvE~TN?r_ID<_@HHXAg`q-biGYSYc0;1$|Jk0vzDW{f->E>&ECXgUQJ=jH4}l%#=Y_YP z%A@R0{(WM>NUEnXlVuimVuk8;aIm+*5&y}N{ePp$EAvp4iqGWss;kgi>1zVBM?W#_ zP$uNuSp};OFQZFuiQ%u|Oi0`Q2tB;|$m)MD;llgP;H>2L+N zKXJ=OC&HrYYg~cC7tk@ONlI0@U5!FierB*}ZjV`EL z9SUK}2Wewy9Bw`A#2&0XLtY)8LA}j8Va|+mxUN?MPfFxrtgb0Ju{ssM7LJ91Z$7-& z$Bl)o5b&JN2q=@DFTC(|BE43bQx>Fu6Lto~VXOKK{6I!=XFp5QTMcz&b#^TrnJdAj zM}C1nqaNX@|M;zm_eXddzgy?#%rAW^>J45SnFavl~HfIMpJ6OA?c&E_{Z3 z(dy^8a7zp=UaZeV<)&~e-xh()LZ08&E6H9u%dntvqsTG7&*axS0P&tvQ0j9o@jX3= zmQj-Q^T0>w*wTQak~hc{o~^j!v=!See;TujC27vAt9a2~1vDgv;g-T2^iCFI_s^ZC zI(=i=0kp`QijS#BU$F^k1znZSj(y_7bX(;&DBZw4*sCV6WdyMGEU=XX27 zhYg{Yx`UiMU|{4l`fL`5Z|l!;+hgC8eEE-pG`j}0_t}B(9d`4Xq^sQ6l_O~SP!%>9 z1|gfP!?rfN;jgq6(ED~Ky&V)t*6Hs>%hK)m#@+|V?Qp^;wU6P+yv3wNWCu;RkfBAd z2$TKE`>gn^jF!&}o1U3AV77;Mcl^i!-+&G*yYn4CDda)NTXhIhHeq>{pEyJJXM+3s zZ(*ib6LbwVL)Wku&eCgv_Vv>2!#P7-)U85e`2e+>YF^+(sofU`;i@V{HF(wYjQy~L4*b@6<~S9O1{$&4KhUxJF6FR zUcVPp#~?pAJ4cjdhdtxYKA(#<{p!Gq$3nGC0huD?dwD$DEhj$^-W(nT(FhxuC$vKE z6KX_TbREg5c4E)0Z=pu%C*kr%vslm*8Fo!I0^bFl!&gJ}oh(ug0*bnKQZ=hS_J=!$ogd*16q~_DJxY>AD)Se|jhuXKR36$uM+p zL;O}hkLeHo#Gxe;Y`^GB7#w4dQkPaUgD`b2`+S8^Jx`kTt0*z~f2Q7s70Kgpb?q;>o*96{uNSbT^8F-PMTc3Zcfk8?BDnqVV^~}r z4KI>{@T>ndI3cErDP9tMJ~RTF?hN3kL0jG=>w{a5w}HtuRmM}~*+O!W(@+zqm%{Et z+9WY>neY|L9Ny#jE=1k=i|a0|+uAm(5V zB+l``oTvyWnsymYA|vojVFfD2DX`Y}=fQkY2SiO7&8F355}ywlWYVMG*cV=c|E+yq zCS`pCJ9j8SW8qZ#W>h9wVv3KX zn0-`{+5H#J6huVZ|#jsJ`zu7RA+W8qqRN0#LBhbvw? zf=0{4VA9Jlrns__6stdl2U|nA))nt@uA7Bm!-E5OLUIxG3!QLw4{v3j*IAbE%N&QY z$8c&xmUPmYkr4hxge~dRhe?r{IBrxPY!jc#TH2&Zvesj8+JBOByV#AT*Aq~%A_ko+ zX2E&he0)Pm20#Ku6-85VYx>TcT$r5-Q15w`6FmxLz76o(i-o z_#(=`?I&UC%b3rv>9l@@C$83#Wo2>h;B-uy8cj%o@h|=(cLy5bfcpV*dN=Pns=o*` zz4}1Qj?YDWJi;Vyo#MF}8gOs9HNh9fcxIObz1cd1>n`2p(p)6z_tz@8a&iojyIu;% zd;;*B6mM9!h=db(pWj?gCQtwKgo5~Lfk?C_tocEpd%7BKaoxt`d~=}U!gdZdE`wm^ zGA!M%Nm_?|u_IU(cV|}PhSK?1pmzsSszvC9^WGS}=AA&hvl)}(Cg5ffeR%QjD*lV) z`GUrcu{%4_gfvmws{lc!han$q#F`-Mw$yHWk2 z30#U_gm2#FarC1)y*}9jCSCSNzuN^^(yPlpw0wuz*UGuKhyGyVl9NPdlO~-rSr%`< z`3)5fiZD4rp62nh-Q|&$;IZK`Hh%BHb@j^3@>_?X+x9xT_iuu^@++v_yfe6heBg!~ zm-E@1iEIJxXSbtYkfs-Yuzl-i>>7w+B^L^zuILcVtWl!}q@~#Dw?%khi#IG5Or;Oc z&1R;yYxvnjA{_nxj9WCGcUFCh;x4y8L7mO%c;)^%IyPn{uA1jSum6^YMH_mt>oP)n z+)6HEawFFhrvVf4|B&_>v*}Z#QWUQbfi30&rl?ng=2mj7uVx-wJL)%@M&x0rlPGi4 zuORKV8%bet7lw|vqV`iV@ZH#3@I&*0P`RrTPx$cPKNlIA)@VVmiAmuVGZ*?c>mXL| z{EWK}@O?5zO;qXq#+?He-nTe;y7=g$p3p!6(r))&@ zdGs2{z@o!i?5Uazbx6O7gNG|2@6aC@TN{CIySFo$dwbErz?z%uqDZfd&}KDSQgrri z23}JhV+DU#d#IYqg@=SwQTG#6RCX?l+~mhSKV?GEJC=V|s)GEm3EoOhL%&C=ETDrE zM)d6E{T4|qVb*%|iywnhDt~ba-?c1$H;6i2kx))_=&P%~==${sz8lB)yU*TW3vESM zuVxkbqwLIc11wp?h5^ja?#9a!Etr`12IHlwU}AeCj(9o`&-ffiRowz*@x;abVBpbJ^|ROa1f0}Hn7W` zitLW-Gcy0W4?O-)4?pv)oW8mUp4n1B#-~UzHUb6L+A1K{yb_Q6{3RUP-wUUA$fMK~ zQ`)kqL?BX@g5Q2fp_1SMclyC+pgN9{lyR&kejdAa{vNjT?_o{)t+-?Q zDOj2-%i^B}gF%2HlD&yIX^sLT2QtyMb3dl0l%Vs^bnZ`OjbM*{8$LBnM{93>w!F>| zJ}v$G%t z7p;|pb?en=igpRvYY`wwUiS;ewW`p?>UJk^DZGZJ?6G72A+N2!XRl1^ZjH8 z+ei>5=FGzlhRZ?n#y6-)oB;WUy>R+&J^H=!ASdcwPpsdmv7)h*z2f^zU+xpG9>3v< z>1pu$!oJeSkG-+Fobq$_iNXz)j#%mU9=C7JL#=O8%*WUmUrc(-Wj%UH%wBp6j;Y8q z1C?9YUU(i0e7nkEk|pFc3dyeMdb~KH8+s;6FvTcOwwo^&nkTd!FMc~nZXEldzlZXal4jh zdUBvUNsWD>)%fkq8MyW>1Rfj|2W2KlGva?@$Si$)ajwkY!_ODO$y9b8gENSXGNWJ@IlK8kD~x)|OqE7Lg5cM^fPHYJJE|RS_5+ z_D0uqRklYM%JqCGfFZ5Xxaxa^U@5l`|9Hydxib$jDQE*MI?3NzuKa>86V#wfJ`?LN zG(pt0CwyMM1>~k1QH4{ksPRCZz`0Sh&5X|pXNAxwuaY4^Ta_IgD@!Ylq98w0lHD6? z#1tk35kHR4@)+Iclow54H4@g?c5Mg7+TMrLQj_VJb{p2B@PcbHXu<#HIDOB5~{^MvbFt^;Z+!{r~;hpJIu@ZWC>s+yxg4O>%CuyZ<-F@A`5%GaK9wRiqgQfT}sNU;An)fO}&n8Q5hsaZW*_HrPR~|vNvr5pP zz88DqmGJ5cP5Ax%I;3s*iK&wY1#i=RNu$eJbhGUOdBX@u{;wSUJ5JO2dzI;d-7gU~ zJi`lVT8ygbG9$Ays3~Q@ynjbQkdHN%XWPL|9X=D*@(e_s22sgM5+ge`ai#nTZflky z*~Zz!`M;tp@R&M!i6-IhiLz|uCQq`_=rJrgk_tL)57D*7h?Tn%`eyPo9NTdP8b>AI zul!25@U9T>d2D7*{<_pwF-g$BaxB{(VFPPXF^1OeMO=Y4d*UtJ9;? zUTkFrYYxGc>p`4!{Z~9Q>`vA`9nUUo(q^85P?-DOmv~o*;3tupuynW;KHEIQ+|)_T zb+jV;c=HX~-o6jk>KaUJ~{plku-YlQW6{JBwQMa%6sa|=GL$7gNHFz9|B`#(p)&Zrg}PrWUunh-}^ zwx7dExv@~JX$z1t3^_MvL3z0bEA7*OpbJqjd!Pk2jr}J46mXd+tvrPJ-ZO#j9tLH; zk6?I87vA2R%)TBe!{SPwg&I1SnGt2Cxb_9V54tVfu_O_<$$P=G(PC_x?kgl$g5YqD zB}5Ju;}Lf+7(Y}^%n)KzD8ixUSB^Kqp2-%_4;T)z=`?av|PY@C3DE<1p8acXd28AP@MST4weH{B4(y zF4rGGL5>-&Y*`HT>-e1NK396txdgRr?dZ7{cb;|n8lL%#HIOXx}WO$oAl~z5& zea)Rjhf1``HlrkX;5L@+4>n~VFJ9#KG@Jum5x}+ie}c5$MKE9y1IOn6AtOR#U`*Rg z_#NFYpcCFfpx9Zs7;_E{2Or^Id0#jgdz!e6UCS2zXNhZfK0`S#C0OzD7CI!i2o-1Y zti7W*;hx)F{GL}X@M-IV(DRdM?PyO7*k_DV4i9nF6g7I+>jMt=7|_@SNVG-XqP>|r z9@a?&|0h*s+W~&JX+YrO{WB1GwV%^+@8(EOI||Jxw3NEzh*nkMJtaS0h*wf`~hN_`Jvh0D2F-_+UV z9wkWruE(9Om`bazqjwimm6%!lm$1Tf2S!wk1W9APJc9a zJjJPId?h+zk72CMC@K>B0GG8Ug5}^A8dM?TgC8kwz1F~hfILnl!Wyn(F-Wkm2m=hP0sl_RO(PZ7>Z@3tl_hi~s zS$a@D89#5T!CIbqofKEZHQOP1m=3f&FcfY)(x!)R+Cpkn8}2WxBkq?&Q0in94&I8z zbDIb#eIyuhlm8n$yabzOuA}FN_d&E@B{~L-!rU?eXdf}fp(uT{dN7V1mgC(l+t#qZ z5$6P*PmP)DBPr%~RUOP0xRGz#F)Yzl71gC}*_?18??+Zy5sMH|KDoRUOxnUjo5fW(wt+E!Y^BTIgM)2M2z9!d|^fEJ(NkXQ#Q)+jq}kUc+r%V-(AY>3zoW z$CU*x$(u>dxDe*NE0(zZy9P_+X9x~-e&Oz%HKg~%&hxn*SJ)soo6JxDgD1{>6DH-p z!1J3{u*IuyV%C%0FyZL}3yF8DUs`uqVEcNd(yOO|m{?nBJq zx8&Jn2TD)z+1`7$J*d3e2eS_yXFPi7vz59rh)(OEzr_h?L{R7Ge6NK8u|KQU@ z2^zEf3Kw(29i`>I;+3baO!I4*@Mk9{E639*qMX+8QTW~I0DA6`BF~&M zp~*plPR)%R$}S zK2UjCf{WdBA0^Y@kR$PV2p!*G#D{A9wZ=p+v0a0uUUDIAeE&jq<9D1M8;lySC(un> zBxuhZ9eOT)7XF;G8}vl4a0)wW!7eL=B;Jsq|8_*e<4b=8e_s2+1}#UFIJXV=@iU|^ zmWs5iR)y-9+yc|5G90WMTULJX5axM{vL#eCvQ8vd`i1zIV`W_Mh;FkuTfK=S~ky+*P(jIgW^3iiBGt)3N1i5sufL z%_c`w3%4Iug&PeD)ZqAhSS2!@OBRf#*^<|xs4NSGht8Ae1G6!2nCHqqx{RY7av)On zGIF|lEGAQgUMhG;o|@`&vKBIoTj7iC+s3ey3v`*1lnB%K9*$Q7|AVDlV*~+~W^5t4 zuv5a{xcNXHIw}rv(bJV#)h2QJRZRp9LZiH}Q9ape*wFlJ~O4H|=Cvd~kO7uudg8F;1 zxViqaKzS_x?`c(`^`y(VWzR~Mcj*Ihe9*wf9eoG6#WwiP#1RvHIYC3lUGVVv4fjUN zQmz2F`7e_2#~TB@m=%sr4yBxwIAHO(@}gg&sDhl8b>I6z|nP^V3K+e6s&F}`}!`FWt!hc zugMiinywMI^S9yoif_W#3m3w7-B!>uZ^Da7!O;6uiq;E(vGD z=`H`zk)OMMexOMsC%nd`YJpr~feNy~F8r&16MKFLp~@i_tBVrhVqiPB=w~Xjp-$MK zb4HLc&zH2So<`qe-@x;NDLwgs_tl^_Y>o;nduQ5;T@lO35k5OodsUvgo>OPD=kWWp z-L>#rqneD8pfqRb6zYZZJiLiQoE6Rc4SC<~YQ6RGK;V6+vI}UT$-CJU;PQ z#;Pukg+og(kyrOdvfEZ#R3l_OlQ6I4isp2}^1Y%gzhn$O{b?H3q%VO>{H~C^^a0fH zhV+fPY$!7rLLOfSD@R${)MP;?6wYPGey`^~pA$g(PX`$BQWE23Vo}?B8ui=Qiwop{ z??zK%f;!+NpTd>KEM!Lv=8$>358~?^6AZ9SWMS@ms9B~nEO>Jh_Pti3$+IP>a(yeF zkR1msJs6HWw}wjTT~snklr9h52nNrKu{XeitZQ<>&FKfIQC7Q#=QfNqnWWhuv=Vp9rXZ~1epqez5n ziZEdIJY(;vLq9|mOQA`^T55VEo9yhy{$ zI%^!Mo6q-x{o$0xRGfY{l6>3fZ{vI*2*dBEV_U2P@~mkXb{DnDbiWH1XYQwy)Yfy} zo;+)<$_C;C9)X6@R$R^JGY)?C;Z`-R;Z90xF_A}i@x?qRJf7RnjX9AG|1~9Imh=Ed ze=g={11i{@RBw}UZ#iD+i6jfn6@<6u=i!W0TNp7W3ZFP=;gYZ|I7)0Ht8@0|^IyjB zT{aPivl6U~r%K@oa0kJ{sY127ILPlm$-0VHGD|srWq7`p8>8LI)yWy6-JbQ#%jOKZ zd{v!2so9T#@-3vXI*85DR|5B~novAtn3HH!042r87`5jyNIxD;&Gf`*ftebOxHFoq z9vg`DzH>>etvI!gn&91=W(z>Nt434cBIP}qLY8^@SUhvAlQ!gcFJvEk`bW{6g_MN*nQ%32K(%42EyO3EIL-N-chlHswM zGn1L}iuefqN&ie+V&7*~W?>!(TKNWa?EbY_&$A@=J>^|CZ3jrI&0In6)=+HT@tpUv zRl(r}YHYE`AV*aP@YN>1=dpe&Q}mcar#}nMv}Ki2h)_yO zLc_@Ry3b3bB}GX^nGGr}?WDf<=ll46e);|f_k;U&U*~n4$MHN~y;CRG(lp52W&iAN z3BSkFvQhAG-53@IHj)1(2~qcEInwL3nnYgrz^z(Rz~+p?K4B{|{nsv7yniE{ED)w8 zpFJV}p9jwIdknKex%;BVGUk=vPpmCih7aOCg2%84oXa>5YmK>Fc^t>nSP3YllgKs> zoa1eEKZrxus^NT=EZr^Uhz~bh#++r!oQ5!qk=i{0qJ7-BexZ%_jw1rZ%AUb zQuN@`{%U+SAWM(>$kW=jgRuChEM8mCh^^N3Ox2oK7~UyC9Ju#JOIMtU&j>^Z^$biO z%ZK?Qo5`CrYiir>LoN-(pq9fysLfXa+qa7J)CG=XP@qU&J>i3iR0iDJmcq8*Uq-%a zWW)VVAu^iy1q2rOLGyv-jP@iO&{%f>LSy6LPSst8DBj|j7=uia$~8PY=!MHumf@kh zWf1CGjqA4PgP8gNjyd~*Y&DNQ+I9-8c0Xq3Ck{aM`z*}YFogoec6?AikDh4VN|o9w zz#-^32A=C+zP^d$rAGC^Z#50NOIx1CN}Yj2PZxq;+7_%hCP0INdN4?l^MrdhV@s1F z=_!!GxFYWU?ViZZ-&%B9kRzS;Oo{HRT!!O&Gr(@LDY^eP8WwNdPi-^P_(gjr8LS0+X=8PNdSTiIezv)A|$TTphf2c;M5Ci zx++DS80u%zZqFRNFB(cO2X29voKJ#1y9MOgbXc=A8DIZ<%+5OuAZ9uYy+(&1(pZ#! zxZldYI2sOb_E&&riyVwR=)*N(d+^qse7KbQ5*|%{!@hBmp+PS`u}WSo*ehAYJUBm! zYI{!OkJviq&srst-n5_gXNXV{_5~CN$KYhaO!%@-4PJc628jt$FxXHB%9jOE`Z?#L zKD`JOWYeL3>wFT#Z$~z(1KyvPrAj)X@V=-UmL=u!1ix;oJ>(Sz2U-#@ z1%0}yT!Hy9kjU}Qo7h`xWl(RUF4N*O%(exlqk-TW8mgj-TlZn1rclH2oM|UlK!Z zJH_IBwK?`L(;aztL;s+j)J=S{qMMmNu!QxkDP@G*!a>IOBgeBh!$rwrRQ+EE=M~Wb zzr(<|E}TyfXs6=~iPijb#;Fh=BnAdCVRS^tnSP%j%k@ApxI|uz{o1vUNn61&mK_sd z^8EuiH}naZ?`gsC-zG$$Rul7;pRmezIL^=oSGwpk!kz1H@nF|ASoyq-b?J?ULLD8( zg0z9z1|At)9E1OU-@%$Ae=+5R2CN+Og%4{)XmN=&HGd;au4fIRqR|)T;NO#g8S>=R z_4n+m$^g*4y_+tJmZF|4Gx@S6oq>3z(%^qzkva`;FUBXu|KaMuGw^<(3|(f~l8zB63WExGX3Ax@ zG+eZw=WpDG9qkL*xh4W+OXy!_oF5Ntha=35k7i1TO4;ZeN@V-D7Vd62i`GmKA`P~u z@Zf?b_NSU2K6dtGx9zV4Y0C`|e$fmMeOpL+5~NA)2_D|vTZ`F;F5ysvC(N(j1e!PR zV#I-qxMe$w55M-|919)i`Lq7s9icuVEkvXNxAFTCY59`0f{(@o*Y7pZ7N0un~T9{)p*5URy6X^HUlla%=1ddf5 zM*a>9s(wt1cAF_N!`1u2w_gB;Di1Ih?_J@8#7_Fc%$qq`)dT;nkt3=@7HItQ1ypuB z)2&LE*xWI0FH?7#IT@YLTx>hU`0j2;X-7AbIa3baaGp?uv2RR`T`auyF|%hs2Z7~| zZkTvih<0>sr8fP6@NsxLIrg%?zQ9|WLZs5f` zP3QQ5dURG5gWrUHqU{+2s`pJFHIvrifeEKDVg5;2ICC|ZW4Vl(g|g5h9}oY?PWeAyKAPi=;$EthardIfWD z#YxPWKZ`onCgAP^f^<`S8?M(s1r-K6A#B}h*!E14nkp|NBf8GabDtatp0C2OYX#{m zxrx;6Xcm4gRHDa)Pr_Vo<|y)OhD_yFX73&q+?MSOp9VZh?)O{R>Glhcrtrv&v=TT~ z_zwj+*7RUyHFPB}f~J5kXr|E&1DZhA71$Bcs66V~dj$eYcC%d~UZm)%3CL-W^M_oo zF}m_ecrExMnrLL9fMFS~Xm&%ZO^dL~_zUjq6QQ2pSKyAZ9Da$O6AJw7#t+|P7^_lG z*pBL*mK!5{e@4teT?^H>(_FsF=hm+ArYZzzUmAxPUDPy>sB&$y^)pKvznH#mqtyyGKhTK z!*;L!z#Lm^fen9E(IqE<|oxMw1b@lN2mUl1lYv;L!z8TV0kT#eGb67)>fN9bQ?jxUxaLR8#g{55|7 zZ%h6IznDezhs#;K67PaH)26U_8fNsIffy+e9DxPe8I1Lhe!lPIr8r^50IWRa2Z~x& zOs8Qc$Oz2B{9W!;^@1$3R5OjYW3D{&t^P5t{~H4vzuB_(YpqDzZy);KH&JS|*&i~# zT97g{Vs;BoqtuyWi`Sh5%j<8zcJ>4`D&fv8^Uv%#LsL5TK%Q(keG(dT8oAsu;=g?^Rmhh<#uU;TPGt#ICp1|pi^sPmpR^v)!oOHQG3b2YroO<>n9RUneeG3@Uo z9*Meq3<7-P>C#*AcBx$UFy`@MDDJ<(_{Pnl!B?`d5st9e7js_Vx-Hn5bPOLW1jDtZ zt2l;O2~yoptd`haCM75as`CrcXwGxA2Q{3%UyT~t`QYu*?XY8y5%C%L?>WaT8$b^StUio7&=zO#*Jrz;j#h%-IA?S`=<9>*j6rF{E{p zlGOO0C@tf@!N+#Rp~l@?TsLR}bB){I_ipq7&PYuKJX)}3_aio`W)IjIY(|fd`qY8* zHWsKJp;P+3LETZ6&T7oYvazYOOkRg<42fmc_V>Zu2|W7ZkvC&0*~DdC)9_wPF?Uwn zwtrs2b=G;EhtH2Kad5m4?FDQ}==wIiE$0fqH@^bWO%?3-^J&a|OsyZ;u!Ue2mqC_X zgKg*X(E7R~_3kg=_Pqxfj~Ppd{0>K^rD{8Suv?2-+e~H4wAXOmH_h;{{4STD)q})Y zSJ)#1pU`905XN1d2hlHeN%gn_+1{v1Pt8^#T{1$DBCAA-!Vi!lv2N6CT!{SMDMYV_ zht8*uF(wPH!tRO=TvMR}q7fQU80#3A_jdMf6Ady0{NyFo~0J$&jY#UYN5 z_C6>VRzK`y*5`0*TOgq&Rp;!7Y{{3AE zoA%f-XRGp|D_kMYgWuQ!Wij+{gb{~YLEdYP}gt`GNIAK^SNOMr0a^Nf;d>bULLhe#C&f9Vt z594lVxY>boPS2#ZCW>IvHVpM4yFv1n5KWghCF5Jea958YlM%R=oRC%_LQ`*H#nf&T zw?Bs)Rf~D;+qUtV1cbmO^%Xpr`xe8VC*#spiZnlb6kiLSWzM*`%s4f|cJc%xh z*D;}$_1o&FCT(F3j68%TzLSaXHFMU?SAg`a%tX<^44P|eL6eLIpwdN^z*{+D-Q103 zeJ5~x?HGSgf+dYst_OqX9PD(BBf6+M)3l!pFv#B>bG{eEQoTKj>N$hwqm_@T(%)9?O8?g>8^=r5~<{b1XrlS|~p~lY|8o zfW#FZy|mDZzKh_rkegq?H)I}jljFOMZTJHH<3(tBX&an}7Y}oCqRuuIww07yISXA<2Bk#srPw#olhVs<8#9+cd(uG({Nw zD~~@f_24GeN;diUSl!(AAtq8T0`&hHkqyHOsOHrPbVcAhwm7eySv*}IjQb>Mj?5Z* ze%^odLQMiZ@HC_IRE_DSuKR34uQaU_?88t`H!5{zG8z3g1ZK{OV76C;vEA2!+Zvo< z=fx;GY4<1|WS5ck(G8&dDFoscb3L>XvgA_fM4B??9^=<|2ZVfd=oBews+l^M^tlR= zAJ>mCE|QBer{p!hG4w{)w|($M<{u^w3)5#~9_+)zoVHij!z-Sihc*UHaHvuSS6^<% zyIH>>re7CKUBBRyNO6$!7(f@jxNVnRthu_T8<++WSo`|Jq4#R^Gu( zt8HZ-TdC7YuE$_S&@nh=?8y3k+fDY&+lWRF6v$+gk8pP2CCua)POg_vLq~uPuJP4` zXI(yc-$5OZWjcVV!6z_~%;NQa@MPa!<5;z>m+3a`<)SO4jq(s~!EJZWA=Ahmt7j}Dd2;Cu9%g#@9r72Mq9@jraquVy%p??;1vm9A3 zL1j`{RAj&8@>;N!7oZa52bgI_vEV=90Dp;12I>l|#F21GI^)=Uwz+p9yE=CP1p0np zmctCXC`N!xvA@KwJuF-QI$4q|Qyb+uIqzqU&%b3nw`AaP87XS>YYp{TIDy_YN37Vk z9Zy)uz=$Pfv(=76)Um7R^lcH^?Yaq>mg(S_HxB}TweY`$UB>e|H*rDaM#v4+A{mt` z&{Lj8(mJZ(9mg2XOW6Vs-kMPJNp|e>*~^&9h2ODpz6O^6OUJ+7T41{b{*VJg$M_H0Vgy@i87?r;uMgsRYGYcxl@Tu%7ssw0C9iIe|t@cDJyO>$hdkzP~8!)BuD;nn?wOjlA1rr*pOD?A@q{oDVG1Nnz z*jm+L()XJ%w$+Zz@8Lrs#?Yn!A#$H%wI#U};mj?3w6?N_g`8jZQCKy(GyX}Ahr1&^KhmJswP;|FG^Jq)|QJOGk!389%q6`K7(!n z<{-G9GR+(7nByt2>{0&#)VGL-19u#$y*%ehce{#5U_>e+?tuT**Gw4Z?fNKT&HK zoTJEG>=GyUt;JcXvkd;8z7HRGEF)}A3v6>rVST^KlI-i*_HJRC7;;IIsN`|_q-+e# zI%G$@&XzH*D}yO*`^e^372&hbVR-c=3maC7visf4VfWJyuw%Zv*K>E<2#?R|Sswg0M!dc2Isxchn&HQRKso-v3JzR4elB2m2OkI{=yKT z-AVA#`-WcX*I9|Gc1-B}Yo9-0jefcl*^szUxacB5$>M#GTAzhEo$9puc`Ali1aQ39 zW9&|{h1qb}ipkiS$NpQ|0eer2z$3GCj40P(Pm@Oc8W)PfSJ$vJEL)fyhog9o$-H@t$IBmlA{~>(DdhrS+DhWlXxK z5vUn=f`WS|41U(Yzn>-Oh2k|(z!QhOEFtQb+Q_CInL#B^?5{5=JBzqmiJ*rUJuxE_ zFKCZr((|`WGk3RI_WBi=)(cR3&vlLMzt?_Yb?GCsA{0U;0x18Jp#b{mFD#b~9l0J+g|na#N443oR( zkig|9$gR&?ac068HcSzstHkZ-rjd(~5v59YFyDD(>1r-pC&=r`l1BOM_p!m`ExgNE z1uw<8{>{gCLHzX=$TyXx^|D+iP>>@irb&_S_NrLZ*99G}+rU)48GnxSvd=F>!lOr} z=)`GulMCZu@uKrs8#jeK|Jr~ePWm)Cuo=8`UNgOSCei8Re^|MNZ_zbK1!YD>h*9D( zPSd!AW9qHAX=nf|sw2_j(_6gdX+~zuoRxzZyS_6+;x4&+Tt>!Nu2PNmK413fshST_Jzj{T;5LeNUM_ zk9feWUpA8-s7(gmfFo?AUzz#b{&MI}BICB>0@!}wLH^ZnI>h~MDsOhezw?Qh);)&K z3Vmz|cTU%+?j|kaE~MV_45OT5K;OPGqaV04&f@$HOz$utPlgh>-|BhPvOxHC6{y5c zE}tT~9v;Q#z-f*Zc|Aa%R^Qx5Gd~YAH%buya*V+VTOOl|hXh(ioPc*xpYc#M*E>aQ zVD;V{cFvJ%=GKXQaB#&&e5=<96+3I$LgUlW^c~p{>$zAKF^Bn5Rt3WUVxVx*Om2T* zLdOO#`%^1E^DAfmhx88-^P*n$ij*Uh$u38rP8F=WC_;?Ax}G&cQG%XC2>28 zcsem;IhzpJkGefF5R~IVbfOQldo!`#aPI(nu>TqqevHJ$mCAIhQ5p&^HD?$1NmHMH zWh~$4DYNkNZ|J$LNQ7!lVfVLDs6VX2?%NT8LW>?_c=`@p@*ozfTr;rKHX5w|n+_Xt zcq|cDqXE$(T>d!;Q!eX~O}`ZAD^Ypk@@py_knv%wr6vw5#h8``I0h2gW`fhz1xjB%8n=5YM*p1uRy=f(t2;7e*LI)AAsnegIRcO0v z2`+F@XPiWgNo&6hjXm^*>CavP$`PH+Mafy{_vR}rHn)hGzwLGRL-@PX2C>B})ZlFfvhsSTb>#mE(*}BIQueN$_mdO5(Fxo@RD54+;8fnQ;zj(afW-x@th>!FDwqs6wmf0; zHl2s^t;>kRqDFSFp*0m&SU_$+l0zq_QZQU?j(7Gq;{vBPsN|jFcNqQR$IL&-xEd;8 zPwOQ-u;M1O=vN55UjcOT%}~5->xvT^O|f(PVfyO6H>_XsoF~6W6Q6fB)l1JPz>}+g zU`_QNxK{4T+!fa#<+>?g$N4~(&dk6Zu0uf4H=PU_In&5ITliG44E{)&(aZVS=wi}} z2Ya9M%M^^@$)jX^_unCW6VB;MJ3iy0*SU2U-7B!#ri}OfYXQ4u%~X;ms7<o1MXr#m@@ zdK&8-&tZ<%HKDK4Y388l9{T>JG<|kY53)l7Y5&Eu@IyohVv0}0-|)9E&#eKDd}!t8 zzZnPD73{vXS;%B6&f}C;mUma&X9| z%_0AbF=qbv@!)?Rk9b8o{LkZMx;rLDI{pvASRhkCAkI9LnwOu2vKblhw8)07nvGC8 zCP>HhuVBCWY?}Vg4!-UC4J*=>F*!JtZfgI?s7*4UTI?-mLrXO@d9{JGL@7KNnnFuQ zB#GM3iELWC4EdfsnOvVi;Y7`RstY{UE6j+_*r`G94w}(*pEbx72P2YO%Fs<|8uWbs z6kM+;NrX`eX0%TwtDgn&e$2MUQ)QFr#*CAU&9oJ?E@l{BCumcd=4rI}*c~WM+DMJUyV6#R!(KLQyu8SVj*IE zY7gUc#2aUkRwmxR2fmU;q;KaGRGGXKa;E%&t{39;NoWTgyFHIRe#Dq;4V=WaOIh$c zq-K!domNyPER2Lq8|Po=SP}o}a>3C_g6% z0oIC4`aw5xA#nnp=$nZ0h78=^lnYU=z4&Ud1I4!{qJaNak{c{cYu!|7w&pCd_K7@o zm)OgtPrt&p$(iigonego=qL2yQjD@?^T^)GIp|b>7_+(CfwE{0^E|y6WWRO8g=7IT zkIuxf&FA_4BPt-WPK~U%>WEW<4VX_}7a;13HT{*nn(WB7BY8IGP%^-Z*7@h7aNsU{ zIJAXI80gUbp4ue4$BtP4YO3dFodzFK6S`lb7iu>A!we%nifr%4?Cs;MM$#y6-irmK zsXYVj+>23TPC6sLq>TM=cmn$c zl;-VJ+OnP}Y`UNQDa38U{M&H?&z<-`e2qD}TzYQZe>BrZnO-P4h+Q({IQ3&C=8FTh zK0B3jfSHoizdn59%imFOuO?Zg`vXk)jd)`|L!$2QWPdN3&d&DrB@bXZU3N$qB)LXZ zq4g9NygLv7?JB5GN;!fpQ_u06bK4=^%87dD%%$H%CWFU&6S^Wzj+jQo!s8|GbiJS) z`8o41;08IGvpp6LcB#_ehY6ZIIF0IV(F`kc0_JjxoxtxTh!|Q9TXmN5MVnjUUu7UI zX_uic$+p;V_b3xE@|0&$`-kawHzTHwCt&U1Uo6r)O4~QHPt3xE}e2J&IG| z*`^yfw_1|=_T7e6GcGgM7qkigIv-7j&51x)4UP*dkazx?H0+KDH3|KI!Xj%)K$jo( zmC4XKts}6dN16VfoPi=PjP0ox0Z?KijjMLmfiw5@?hamst9oSVZ8ZjSo-Ki*MU}86 z>3&`nJd+d|$ z>!U+}I%%G93?j-8k^JRm^uI2KD(#J>%?7bd(4v#@Ppb*UH`HP26dO3yu$w5kucFPN z8GTnWxQZ;<@|y?8|bdX691TQgZ=+&s#;xtM`!iS$FK!bMp9?pGuMy zh8yTlNma&h>u0!a*2U;g@Ps_|2oj68m|#_Ndgpo%<5v6y^0Zivc6%MX7AJ8jjVg3M z5)Suuim>}-4VDWi5Cg+xD7>}}(=I#Hx@GyuFJDMTeworwCzaUAbDMY)`(~36mvOL+ z7sg1&8$a-u(@o56CVOl;@wjP5gYFf>w1#+g^0gj5JHH>dyyQ|dTcXIG+F^9Oa@F?V zJ3;tQI+-R1=|anyt?IsWrq#bQNv%Yi6hNe{cjPZac~7&~PbDNZ<1nE{6Yr2^RxE&!!MsAOB*m zSep`o3^fq*&R{z$j^hWDuek8jBAoTgnp0;l-~m@Tc16?#+BtVQt2oxjlD}8EZM`dT zDpY1~230a~cl4O$l{{KzOj$I=-}ihv#g~iOX+8mV0Vy@^Lz` zIco-UsRL=h;zONuZSec*6r3|$gCQaPDD-e0Gh<6TYmj%0+R8}Nz~UgTxl@W-CO1P7 ziDp`7jxn3xx8cbBU5ur56>L&GP1{Y*6Q<0N98VA=KTB5PnNy1)syv0UZ0%u>Y-1VU z^h3<#JQK2_YZAPww&J!!KE&os7{mvwK-HRys1$J@S_oWquq~+9uNGKVO+A zXFmgj#<=>P3Hhct4}aJPWAz*%+AAc%`>kVa=k;2Yt~ww?rtUH%&*D~)wS}+nkB~dM zckTxN-*e#1{S35;IuF6VTvF!Xd}d{) z(lM~Ka0AYz5_rs28&VhQvYT6r*srr^5aZdpRC$jLUdZ{6T{G2&Zj00=T9J~ZFx~>Y zG|!>u=`QH6i$B_$fgRZ%xIa-d^ zPkRIhJya+!eiZM@Y$v6Wu4Kn>Ez|zpnv^aur9n2D;12?}tOlhn#ZIVr3kLp)2FPSNX3qzvA^d!2W zcL{m*XCY~SK{6Gf3PNou{jSpj$ zMmYAg^us>w0T>%U!1q~l6|e2kN8Kw4^w$k%IBRD`#947N>=(?~W!;5Wp}X+7t_(Tt zV~6R{94YLh6nHg?(I$UO(x@s;?%!EM<<0d-LvtRkXpn{2h0}@ITWh-bgEVn?wTI`L z>ccfqP(&!)A4U1Py0_CwA$)nc#|m2nr22e+h*e$=~48kY6H@DlB1n{XY;uB7&+B> z93>|YosuV!Pv>>%lnCy|r7RAUqN~B3bJ6C{4x~3_*OOfvPLlqmBJ`lC1T{}EX6&w7 zkl$(ydHL}rrxq`Ou={Hv{qF!Px+9%RSEo?s{t_C`XQ+UxBo#hk~y^lVc=+suSfWGGy1JT^zYcmF~N>>2s|AL(u&_*~A6?R2 z=+{k=OeLJdurn(`Ou~|WpuVtknkdbyGNZ2^{DcqHlwDW3l^U7vrg7>qs5T{{>*=t6BS8kQyQ2np=Oj-5<3X-1j3brq7BoISg{_EKOw?;yQFzKkqA{`vSApQEBg?m z?jr2x$So(%c#=Ei+cEg8855CH1{z8$$zGX5^tzTHO>>^f*zQ`!w_LlH+GYf!*tA%* zPr3nad*0YJ-Be&7n%83JmI%`H*oHI;>Y@3a{b;6e4em^9$KWSUI7@dLn9Q+c*HrHS zStV;a;weX9o*4OU(1Wd;!^xO^1wCbQ1Cr+b!K0GB(CGUFuKu1w>=Wvt<=`MsLawYn zQ9Kqj9IY_vhdRvGYT;Uu&)`qvbv$`(1)5sa%FK%Swr?UbqHUexF3Xm@Fqkg97xt_a6Mf&DfIX*3srzhFH4z z@I;^6(FgMS#53h9-U{RtH`&QFbmu)*Fj0%tmRGO?8->tSjw6WsmP3Nge0s9Gf@{S& zMjHp$lIl4vjC8vMyz}A_H?3Rv>#;H^_#9IAS=WY28t`anaXPLI5U0P2-Z7gdFeG_J{Ne|6YJ5Vk+F4{^R1EJWKERW6CV<{O z8>llMK^Kl-JF>@|c@}JmkAFLoXNzUXnrnyPh?O2OJ3E;~v|Xk}{Ojzv6xWE&EW)<= zMIdp|7>DK6Xoy${tP(UN7kT>7rj-Z0%MVcJfD;@@6r@K(8Zi5NDDF_W#rB^TBnEtC z)?%qRq#lqX+sCbmwy+{P{N!eRKDRXvFJhcHida^FGY*#*pu>+SvV8mln4A;g+JwsN ze>$m=^b%!!m#RhF&PkI4%Cl(S(g&=qlPP^6ph8oVW)PjOQatrHk%nv8@J{n)vu{n7 zkQ`<|jahyQuKp~<%oiQ_vd4m+N*5)At`o3BD+g3(2BFA=L~40(E$U|`!P@WuydJDV zcN|P%s!PStAgu&t^j?9%^g)pMc^fR3j6nbELUupbcD7`-D?J~600&}3`3FkEp&+P} zy)r?OYHZ$1$HQybFQa_?mcN|tdi0$2(cDGSZ_Feg{il)AfC89)qkwgg>%|woZ(zy6 zX~ZSfiC)Q@KssM7h4C}-%of*GNDE5X7sUpslb6Ow|2@Mjh+yEK&VBINx{S8JbRpZ0 zcO$uLidlQr>7|BnGzb)--5-L;ulKj$I+qY(_kRO-{i#&Fr4*RSea!T>mE_FxFr0W< zk|J*_tHujq9vsMnv^P4Wxp6UlJn;oGYjc ztCmM%-Hb_a(o=@l$x-t^jLf3~TPR%KlR{JEzrseNbF9;CQ5dZ|LWVE-!{eiV$O_IN zCpNecd-74}uK`5ZTwr%j{{YppE%sz@D%rlo2wvtq!*H);#GE)f zGV&E2c3R?-gM#$rQ)A+CPm9Exd;ovJLfp6f5g6O3bFEs=wExgJ4z`|QWV?}-Y~2R- zT3kQQba`6%NuF-g*C#n!FEYiOZK;m<3hH300G2{5%K!J9{oXCas)4>M|ajY)rv44w_Wg_jFD8I#kgXek(uSer)6 zUUMz8srgX%y#X$)vVy`&H@b#vD!0C~4B{TC!jw>ddeL<=-MKait&W7kuInmf_rEE~ zzcQE6xB86YHc5P=Gt$Jh4XA+rYfS1oM1$ftf>iVk{5m0-qx;B_3uOuHPnU8mt(-_E z-Dza)7d=9_{fId;IT3T=GjqgdG2`#qi;G+nk#BH{4jH&YAh*SMX0(PbU9^Id0Bg$2 zvZdqR7eOGT9Uho00kgVzEGp3>_nx$&(JfzrrJ%ShnwGl>Hl|XNKHJlTdO?3_aPkva z>hu$(R+bWWOEBf2$uPcSCePwX83$nG64D{VDEIah{|WUCx9W3rn#~|rHr9% z`vg`QHBeHVq02TM1;y!VSR7i*jI`Qt@6Qiz>mk}PF8ky*Jy z3nh8lG^;QPo@%Ydt$IDEH<6DaOJ3kL2Ps_Q5nv-2=^$|54V|p6DG^AQuKNb$F407NPB{H9}ge7lgQJKUz zkP`Zb$FFO_quyS|zbOIp`&8)frPJy6xUI~!1(^`mIE+hr6zMb0#r&~Kk=*UFU|Jqu z!v(i&Alxe#r#WhqzWaud6j01R#+>JUwi9Iwe|Qn$DMJ|c_y-CFe1MZeszhNcMH9{? zb8#pIjJh5^J%OuMa=@Mdl&n=Eb*A9$x(#T{C(@L~*G z?kG*F6ogod9vKo>Crvu<1)++UBE8-EgBkC4Bx>$>%2t!eP9b@kz*&gz{SkpT z6C+utl{w6niZ`ehkVJN88*^l|X8!P(dd6d-HZ7Zcn$?p&fNRa4@zo^1vq$$ez&wSE z5Xs$5F2~h@X~0virP7_4Z+{AAiwvl%oiJ5RUjUJ!qgeAk20pgrV`+*t3by7k_C%g$ z2v{>BqKBBcqU~hnih5Lf{|mC;p2E{~8xdrT?g( z>JVzWDT-}ye}zj!t?91zQ?RPUk3_tbXV%Bwh1*vzu(e)T*{@GE$}yMTGl8Z#kF?($3?oaEeff^7Y0Ht)vucyyO(#hi^t zFmHDRQxK~|&2Lx`$&eH}rJKdHtT5U$x}QCGekOT-u>s#&=d*M~9rR{IK*sQ6_#~bN z=eiSc%e@dX&)kT%6q*5j_7UZ6&F#NVt$<{6O=`6%nYfq-5c`&KTw>vclRqbsdoPQb zoA-Cn(DT#L*C!Ds3R}ahWfl0uT7=%{=zy(aUc^#e7U!he;DH6d@KBo*eHt@~XjQbK zQ{QLKu`mL=&Kok02lf-G$;0edb$23Duo{lNOQqB5d`NDaIladz`YY@@QP%Jl6DD|= zC`50fZzj}%ZLBmCBke^WjBJ2&Y(0+8_9k*WCFr@%3Z_NblKL)YC^cwfGS(T3U?q?N??nCpWgFuHKFcFWYG0F;a;1oxUt=FAOvyRr+KfHE? zn%Fgye^))1slcOMgni8=X?`0p( z7)Dbgpts$nL0`ERyn}T3E;7?F{O~rswo;EcegBSX=tqiTF5&SJW4hj!I}?5s!++YZ z;O&KM7?rStkkM|~d{&Gm1pi^3)RqE>?&EbnxCQ3_{bl#YHb7WjHk+n;98ERjiI9*2 z@k%#_GJ8*0V0eU1wB%AiL-ACz_&NACy~5eHg4A0{oz8G^!&K#BC>5x|6@Aj=W0W!T zBE23i{7xh}F4i`L`I`o%1;ItqW=5NIcWF*s&s4MCm@B4i!I9!}Q8L z!l%pKsI%HP=F!72R@_yLxV@SM67!A7>P=DP-|#*nd*(UL2{8w@UV=oeDTT-56=+f` z1?%sn!0{-=)wMkETo(Z4e}1Bmn;S6`d|O>zYHldO=7om-wT&-|6wvp5S@1pxTH}H_wAC$$%E@aNj-o$ zk!V1q8_%NE6?LR(ck#q)9(izkHa>P(N`zx~(Y22+GiMyN=!q5Q@X%l@l37OByV!)h zU6ux)(?8>x3Bf?bI7jW10cKk;rzd;Qq4tB@ASq&u?JV_%+n%49tFo*3Qu#_Wftxqq ztNNl3lZI9F1-Qu-GVY{7uTyWc9%)Dm!HaUcHQk_IZYE%F{6j-la*d*^gpW&R#lly9Lc- z7t#EHZ_Hj-#PY|nbP{urc-DB4wvifAZ*mLgow283LmwHV&64z|o&&g_(8D+9##nL9 zW>z!WoCKbAr&@-JAbPbEc%KU)^cJ#PBY1WJ2`FZ+Z@LCpDsOiteE#vDGmoh z=Fmml>>kbOD+Bc((3-cOk)5o`%~Mz4Vfgn7#8zKAUV7Zrt;6A)xBpJ*^^iAimBk~tO+IuY~jq`QnXHj|e$6?HiNei#xhbzX=ITQdrc{l~bL$ba}E{{}|LW}{?eJRDvfNYBekll7{T@Yf+8TydI6o=BA9 zYPUBKWX!N#VurLybCmr!oqJE8pJXm~{D6i#$|Nv*JGZI*1RgJMbChXC^5D~TwEpn{ z;8O$t&fimD>EMkgxp__c*%LVGa1j;X7-3_pxc#lz1Xw2K|SoQIo!_oNMd? z$$>JI?4F)}(b}GfJ*bpz$x|ad?Uy2yIg%Z;jSd(*6;?cBG@n`$q8CvzYM0zN6^PBG@|1 z7Qb&1CDNQr1au;qSE{Y3ucQnDHZrhFISFg;wnNZl*+>TO~m%+|7WmeF25%d&`QBFY?=b0PT#|^H6aWfBy>{3pN z)N>OsrOOaBrrr^6T!nZiCmVXygUO&yAa4#HO`@bwh}|cToA(g!ZrOnO0yO2()59g%zS70Vhtl7vqmX4!oTT+>I`(3fbg%4beag1Ol$pS-xFn6gu zl-{p)CU0kHN@B%O`M{Xg>{f=V@>7Dr#Q~>0GiH*lacog~3~(hCki0a7uTrpuDU*(a z>w$B4uy+!UAMDNo;@8pm*9Smz>UGp9I?124xPUvxmq6KkZ+Ngal$YIdK&0dHyiWg7 zHdYk5((fIsX>X+l+u0w)TsEy^IyoAKt*A2=m1RqUX#A4X)_(`i3x8qzPz2A0gHh9^h4=#Wd|uf@W>u2hMdK32fy zAWME@TSAP(Y$CT)U^Yu`OMd{p=p_ZJMn4FXN1JFEhaYq+!8T?;dW{lv^eNvYx70XEX^U9I_)B#+oZ{*bl8)5nmSX-HlT?g zU+`}(v_Q2B#p1F-d_@tInZst{>|=D8ZCh z%%sK|!Gf&6mAf8eidOFC5k9-Eg=#n#L3PL0QV(L(7W)q}BM@qw3T*GLlYkUk%KgV;`e+E%P&JVn_N&&|`vd};E!`GdM{uM4n7w^L5-cE2xOTdgN zfB5)M&D=fdD_lZS0+YP56>1mjvb_R@CQ{fZ#j5S$E-dro6-^9TUU??uAG(dh%T7Re z>}&j2?n>)!CX(h_SC|&@4E>cKp}c_xEgg~uJB3|vYquxL2_&f{%3*Br9dq`@?j0O? zL0}`i3*ReMK!WCOO3|}qGXmsjwwPmU3s2%w{~5fZh5~z{7eou6?T0lrBiLOVRXV1$ z1dm%@#N{EW7%0*s^ZnQOq?n@^m@o;Go=jujH=lDeE_q<0yf&%oN->q+(oFPBNY*O) z3Eww!Y|S`@1^gVc3bvur2ZPWlQ=6B&YYe+BdZr#9W~hb!x<(mnm~*0?I*L%)tEk2iCgI>5Nk~Q z@Lts;oNjE$GD}ME^vz8a=6DOlI!%!1s!bWb=W(si05;N1nWFAp;cge%(m^K0uGano zCav-ZW+M0Q$UZ6Z}+8MCr;Dy6)5q1#c%ow2KB)8qtsU zEC%7vUB|(z@EwLryW{L*V@R&!1HbA@Dy&$d&NRf@V$+;Ly!xjU)+Wp(6N$^BcaC{5 z0+(2HKq?5uNhYQS5*)!c{E&%|-o%J|J!npOQ$6koh|jWd}2 z9{2heGT(8&{O5H(q+2dzGM1e}=xV_~&ZoJ;z9G2kZ3~R2^L)fv6`U4!0#92ECbjrI z^lR@jP?a0R#*Hb)wyB5VQs!-b%E|$FjQ?97In4np_J-3P7aMrCbpy;$-$`>nY{wh< zKlr}Sk|eKF23>8}!SUN%Qm9m5_4f9ZdG;gfCTLNqkpw>SK8+@4^TF~E4-Mh7_z6CG zWcJMz9$e35flmW~o3Q~}xJo$sL5@`))nd~Bnc`f%X>9V2SeC5u6dR7&P(j=wSKT?A zDLuCUMN1#!k})Y{C)j8`3p0ci=Qlnzr~~JhCb2W~;_%x9L!pW7hbL6lk}%f=mRIlk z=94x&tsh7O(@){5bUQqhz7ER;)~)*M9XRtSmxR+<{wuNSxe?{>;I`^c&I?x`@V{v zc`c@nv7?yuYYUcRRYKDJKfz1U6^y6XbGA+YG2YsfZ8uHB78xPYw>*%gObZg-bD2U3 zK8nXFc+L31{N?EN5c;I89FNQ96x^XToY z&CsImjhsNNI<}yRv!C#qJ9Tje`|-FD4FfxH)xL)?Jy7W3&oIJOZAO?mZyDr8CJ5=| zO#b~Lb&@yVi3@yN`FYW^U7pP<;eM^Sinp>y;CE$rcK=~DO!zSo3fkL2UMCd$kL7|| zhYl<};XsT1!r<8s6K;O77Rl!Ia+NUUIx{Hf%W)2VEP!z<~9qxgK$fU}0U2Y~5NowDg3Xf{F^6^Tr2$U z#p|QnRt+*p`py5lTY#%S{U^Q<(hS`P4sbd7&$<1J(_m4!3I=lu8E#^Q z<}J`T&VeE))j(1&z^{PmAooNB(^wZ~MoZJLkvS}D=pdRVP%ZCo*JH;DJz(q0GuYL) zmkrS8xC6_2ajO4u&`aIQM?}Ps-Iho2@mLW1H*F7IFXLdL^JTDEJe&2*DuXSb?C7}l z8lE$H$xlt0&o6Vj37%^@;d4MfcZ=J`v~nBy`oLB^@}~km48Dh6u~HE8>LSR`zYRq@ zx3J}-ZsAd{ap?WUmV2Vsj%R~rWAFGOxG;S(dRPiZ4XYbm;@D{Lyr;lI$FHNb+(g*u z-Ie;Up0vR*fa=6xSFtI&?g$z-UsD} z^hoKHkQ4io&Cip{g4M&sxc%HL7PtH!9Gg3U$xe8WYZrBJKUWvStT(GDZ+#Ih^Hbs! z6&~~JFM6~2j`7^*^cb9KVkR`C|A2GuA>8y)h9na0xwM(9P-PEeQlmEF>)W>Q?Ylhv z{3|IXleoJvmQBAs5+#h!!)V2`T+IGg zcm}59DIpKzmv|2nzinV!Hh2)XXE-w+a2!lF5(~E$$o^6aQ1aT2N#i9@xA$S~ism%T zO3Z>?cr4mB2ZDPB)9$vjn2{+53y0R?4Uc{FXo@ZSP%{d~{wTsZ=A)SWc)_-n_lD1R z-N4;RQ#iNmjf-{l}YsJH>W zyb~z%*m976JBpr9sDm5NjZspo7lOul&|}^O)E@beCTPG9A459$-&+h;)@P@LXYs!G zV~{=lf}d!y73XQDQ{QHHdMEC{iuW(@TDd^qlSl&hjZZh)v4gEOqz6zW3P#w~hG1|yX&gY z9#~akBY9x_${OT9InnA}S5cE!gQW*$SjLTNE~CsIkE@JyMO0Pufc$Z4oJiB&y3=(d6``80jtJ9-CDq04oYZYk%u#ecYTw@`E$I}r6Q zx5I+Eix{k5fNs9yNGIk6w`h(M&Kv&8r9onwKG|#v(_Jq6|kFN`0+JXWm<}WpC|pn`tE< zFC`0|GV*Lpsu8=}VnC|)b0~7U4Q1?AVr_lnDehtxNS%HRN$(_J!7gPXl5>jl4sV9r z@m9<-$DZ9E*uf>!Aof+ZTC8Cd#qLi2i~O@L(Z@Vzt}QKvT`Q5}W_EAFZw_Nv>*{A< zQ?v>cUjBs2B1_yMm`ArZ2t=iZM7DKx85{BKJlyCsr$Hr~*oIblmOQV5OL@72HLp&` z(iJyhpRzq0XPwC;s&zS)oibGFKp+ufPcEPASX1l*mc25M9-Y@_qr+Zv@s@o!O)$B> zJN5&-guQdL^*fy3xRKY(I*d1zV(7sJOUyVePgy$q;ZRAtXkT^_rgoNM3_l2>cBkTG z2My-9FN6YpT=As-G=5pXCY|d4gkx=nQO1QJdghrx=Z{&_aOVTywlo%V8XT!xEJ00c z6)?eGml}n<^pXJ+px?Uz=Up2B4|`SFhVhwvtU($o`Q5_}?*zNy)v?UlXbFU0AI+9~ z?t{5Eg`9`A6Ft1Nf$6oFV!rPRoF!bB>jOOJy)C6 z_iB*zG$#sJs=#k(T_WU;_Cme-Ni4p83p*D+hXqI1v$*?tV6{||N?j+hW$6LzjbS2{ zeounL&VGJq>j7?2~#!VAHpKLgnhQylk(Cia9SFd36*u z4=CYp%6gMjlqL4N-4)+*9M0Xlp2^~>?}BC181}k$Eepg{mhi%brT460_Z468s=33c zyP^q{thCubtwpr2If(5vc#Ca1p)7ew4|H^;vFbiYoYyf^$l$252fmw8uk$l+*LR7% z?^7fh*IQuJ?&A{WtFf73KT$4*gjT5peSDJ1JOk}0?|BaDteEP0B|i}o&GN8l z^bK^a`it|9D$(?hmTdbnffCtXk1JzGvxM1+?ALS!_B87dv@My8wkN*AIaPCZt8Eh) zwbb)Gng$9<|iq$y?LopOCv zVb%`om9%+rycYADA<(k4jHqF!G~Fz%fOUg5VN00~iNBlBll*8l`fnAK=#=2*u%q~P z=MZ+i)d~ICNV>3Jk7n#D;+pbN@aEvW`OvR5wbVSVb(VIxrd&t;5o%T2aoO zk8rg_ho$Yf!bf`xW`eWw>`-Kts3T!NAN5d$dA>RYK28PV`gJ^JN1Xt_f+(>=A)~W> zFZf4^L)hcW=kR#uDVV+IIG1(R4{vGeQi^*a&VFe@A52d{Rkkry?X<;Ve~m#_QwvVq zKF;6F7)ARtf~hbslr8a3=dQ_na%FJ_u+Srs1zxVe+u4eAN!X#c3HwOHvDR$wNNJk& zU?aep+i+u{U=a^4TG!B#f+XAV9;kC^2W zkG8Xo$nk9^Eb6kP>fnQD1Mp3d!fuC1x*=tQr%#_-KO`bA%A)yqmNGP$X>n?+yp0F$HRz!Gf zLZyyEU+ADB4DC1vEB?em=viqx(0URcE)n6Ful=BYq!rWESJQ&@f9Ti!fSWB=$AGv1 zHfq~x==<;&$}JSw1rKj(A8o_3+>uRscZpAQuEhl}K0@R|M>ziTAv8RCg_A4O$l7R; zIPBP9+UvC!D~8>`{BgN7A$$zXnY4+8U$@6=d0)_rD~0@wV5at=hQHw+0-dYwxo&Oz&<5FKiMzRcVeUha2vApZD!F>WPd=gHbsY=quN!Yn99boDw^tEe* z37XlQPo?m?u&5h{I!z_`l8iGw&co815;V4W8aoxWfOP*z!zul(urS=4FP?WE`Zi=? z_xxh;{G0>d77l>P@xc6sS7A(xCaIV$#jR!nAtUA*9!-`L@@Y;?cdr?3S)Y#28(xCL z%uviT$*G^QSD(E_2lN~)yaPA1a+lIq^LgJF(WAeo>f-{uY5OXHfU!;jjx>g_b3%qQ zI?suN`>d$$Xnd3;J{w7LD55(L8DJ%?5#D$6+&5ByhJxsh?JW+BSFg?~6IR`rZf@Np!&8*`qLV?qT?| zQ3$0jt)|rp3x@u%`cr_?_BD@OT+SgP9U7xBrYuKHuQ;xaGJ)AVx(T z975j5mcW+SDb(U4$LyL^DRibARSnMIXFXbukNS4u$ecSk^xkn)-6zY2D(2zjrOkXs z{6Mr241+)F<(TT86S!NcQxsd&jjwZ**~5>)+<`-`G`Z#>{C!!C8!DBJ_C*ki=JQyMNC6z@%HZPE zCtMkv#P46^*{LQgSdi%`aopU4qWsLKXt+y{c=h*KQ}0Au6GKQM z?jFuoJ_pZK?FDk>54g8z6h1dop-EDMv3&JPX0$mJeX$Xr{L;os;SQe`$PJQ5p{?}=rrtRQIuB&PbtFp|WKJ`iBOzQ$i5=Hg#m8SK zvmY(-;3)HlOW0tD8{Fc>mCD+3i>CIMxY&2=Q_rP&aQwnr9HgEHHP3G2SL-_%|0A8v&=fLQ$1`9^vl5Ne8b=;p zD)>85f-~+?<3HCIV^aKBAwx2rjq=o_?N8hIYn^rQ?ne#04h`jw`VVLByUqoETic=p5qyK z(-9Jn3DiE-jgYtLpO6RjhYGz;c=?|el@xD;mQ3W0D$e4^=S?EXa#OY{ei(JGEaUwx zHsQ4mLSOM-1lxaO5Whcb7q7QW1hIqL_`hoe)7ZAP{PmRvqUZDLaL;r(rlLI=Z6vEP zq$mxYx21C5!#qIh;#JYg$km+Y8UrwE8o@S9JA+3jEui^_+F_MmJv1Jx;CICLg03)U z_xP^N{-q2gV-HD`+TX;T>vv=A(~okNGK-jI>ot+I!eQ9H?gB(VOQ9``6iF_=3FEJZ zu`q$C>+x@;dG2eJlDbqtw$1OoblpxjxY z&kyjx?&gh2Ff@Yo^v=W7ojgqOv0$}(R>8~PJb&idV79zMUu-kx7S=_N!F6g| zSge;FEX^;3zcC0ebJW;YkpUbjQ6fQH%wA>8fZ=kw0(Dn`CG5Q;jyzP0A;z=0mAP&- zY{?C7^L!gz*R9C5xjcl~$@(C<`6#pb@Edjsw7FyciYdNTX2rA(i{PN!qHinxbLZjA4F22W#x z2)o{MP4kjr(dT^EidJ2A=;BbeH=&U$`#p)hdRxuk{d@<@rcVH!>2ty2>sQDuS%}t? z@Y$EQd zGh(G(pSg&mAxuG-1rHtd1`f7zY)Q)&?t9fUFpm5I#lrJGtkwiAS39w{XBVSe`Un2t zn8y%1?kY|%Ps5u}pP})pR-9TC0$R(Gn8SA~7Fn|e&W?|v&FTVKFF%^RS`9F?eF>C} z6sXs>*3h-ynT}{l;qo+XD6%;RYj%yGfxg3qb6E%H^j49^4xf#NeD6-+_a zTPP;)2|jZ_1)}@YC@CftkL!-2sKn{C=fw?JIdmt@ex8Kaf`+q(nI@<@Ae6UiDF$0r zcj8QynM_bR{un0om*q^e6ZnY}c=rn{?u1-Z! z#A0}>*|r7e@bo+tP<)%o9TlFJXFG3#>6tM0v856G%0@AtFo6-! z_l+}oQjS)s#%#V}0DJXg0RHFSh`Xco8P@yL&fVZO_>pUN36@mc;^% zKf+Y61K2+E4MtTi=iE;J0(DtM%3i)7C#gl_Z`Yx$+(e*&`mD#;1sMoTg~pYB=7XD) z>z7_j#6i8TlrQ1Of}3KwriY^-G%AMqvlAfU5G{HPvsuH_q0DBID?9xlmQ9&DgYpK= zfX|zS9z*dgrgU%$`B625X+Oo;)v|2XWP9P>y$e>@rm_7wyU-%I0c)T4azBf`VQ2AY zheGv>4XZB@8@TNIP~B~Wfvtl5CK>0p*{j1{_g(x8#6sCHtu z=w?TaVB}iDhZ_sl?k*aAIa*KRD#L|;N(@N4*SVjxZ7uky?0mc#1HTG+BX4R(K( zg&|HAxT#Z@dP{D>*<~n}>z{}Yp~pbFI~4~#kQZot;kejFpB`3B;-t=5&~~4{{Ld>U z6ce0GTXr7?idUfQDa9~zd=lNR>Jx3e(+LrpUQ{*mHoTb@MXOef#rrw`U|Z5W(zi+C zH@vxm8?PupZe9eYNgfu3T+E|sSCa98oicV_m*o%ZTarSWKqR~ zEATml-4qC%2ZfB2t}1s!U5&QsT!x2Vp1`RcS#ai?Hy(7=$CMUZ%6aw_#tXj{wXvg^ zt!@a|-xcN<+w@t6WH{a%90>iHx~wMRBAV!_LC$6^hG`b8^x7mQu`8HnKU|Nqas+x= zxdXGEHU+NSD+7MPC)j_}g>L+u$2<~yF(GycJ7oNdr+{Ye>arv(RtTa~+ZrKxqXpab zS1`Gc3FMwT9>WpAlGxpT2e0mVgH@aC+14jZSg=+ArIjyYxg$#;a)%+4zA8z>2I`ST z?`mrBJ`at;d|iEL4;aMv0SjM*A2Ku9&d4DsaqcKYosgr({LS#U=?z{u;|rp#uIx!@ zIY~}*rQ>Ih;p+L*g|qbwU_aY1Wb_g~(x(rHD9FI0C~4T!eHj+}F6M8^RO9P?rEFtB z6hHn?0W9BBR<~HGonN>31AnhYlO<|#G=6Os#|haBd^#I~N{v~r)F4V0@^AF&9~zYu zf#cHO_)NYHhlRa{vOn&;fn_TOb(i6{M{+o;?jOh|H@YgMJwTOZhV*Wb3R@m+2e0?~ zq4dNM+S8}TI}The*iV09{@+$um~w-k9Fzu?&W5y6_`Mxj7{sKkK7dDaH80Z=D;Vh= zgzwKN!TONI;*3;iaLWo@@lFcIR$oE2ExYiq>0X%Jwv_F&`^g1s&ca(YOIg?Ha@g?l zDi{WEtS4&=%sIbSpa+j(hg(8m+5Xk6e|`j5^sCb12gYEzZ3~+orbGsRH1YeYo4ih& z747;jf}5r7Me0FPEJnhTKlJ67Xye>q`n=eb{H)se9D#Xp)|O*_QL23IeFDW-j_h#G zOjg&U38Qt2vFLRxd@=N)c{Yk5GJWd0Yh^p;eVoJ9cLuK+=O$G%= zF<%l`jEgR>6=r@4w7W5x{XJsI<|!C3!#n3;_%Wd`my<$^PfOD7DT)C5?U_l^7o2Fl z4K-H3$$T} z;L`+iGpu?*fu->~&Gx|N)En5j*%_|;%VUM!3L(pBi|?kK#gp3Be5jT!$3@!kgFjl~ z$IJ0haxDV_&bbI@02wejqRUkMzM~fr7={O=#cn4dJFk4W(KtQj2Q593N{tn(x`$+a80=nYYZ)U-(!dIj4&Tv=d+D>z5h*A`sok% zFw=>>i<`_Ib`PV2OLpM7y%{(*S9l*h&4V8re(*zS3p=@P1bf{zjGk_ug~2lhQlLRI z?tAzFt<^Jl6bE8_Pb{cg-@xz1W;A++D!Im4&>FiskiYqZV+(}N%~?Vvg*X~JViXK3 zwWp8`!cNC|05yAkf|&=@n0r7rJkaqW3tuffFS-w_RI0G3R1qFNxQ1_TFJ#)*$|M~v z!A^{PfnoV_w4m!Xev}?W|LnS8i5sJpRyRd*-_OGK?0ax%)g3ICOl0*9TVZ~YNTA6& z3nsR?pwKh{Y;SL(^K%*LOt)dW-CfWdVa>};y@03Y$gr4!`#FdAQtag}dfjI|$uP zh3x{B^s@N+n`!*om`gBqoiy1eG=uduH|qMD1%4O3fRl=4Cq$E&!uwcuOUEBW6ig^e zClQpQ27^pZHU4(o0)dfIFyAHxeYWa=LtPKBOhbHrQiU?!E3iXV_h6x31&*^0_HTpeAV>68FXToG9HKw_4&vqj#=4A!EYm0}v@p}8 z)9yo<{QGvi7h*svZxtwii7~B+9sxO%$1#_o3Ia2xgHpXLogmsIfr^rMuKnqM99nthK{8(Ai-)3J4a^_9Oja0 zt2%^e)^gF=KcUq0E=-#2Ae{ShAm>UO8VJPBA3==}*Eot5WI+TkMS z9cVgx9jg&K_x?k4X@1*CHm%Hy)?FJ4H_i^^l|IX|+g|>{y|)42_%~yu zf1IDW(OjUSNy3?gZD2I~FP7`>;_ZAV&?V1sNM0br+@b`s*~@Tzp+A##I%vbfBk!Sq zR3g5%m_%C!s0Pp-GiBJ=h=2e$IQ%nv7DI{0;>+{D~4( zrM=`NW|x8G4t0V4C&@;(3MP(yjUc!Gl1qBJuybl1!OV2*nDO*stTxnzt!nb3>jhWP z%c}xS9qbT@{xk^@e3KFp17RD6SRy0_Ss z=pUdqYdZzCeS-hKDl(>Xm-(F$jHzyG(EZd~agVG9Gf6Dq<$vl?2Uvm0twWGgBaQET zg#5RVF)t}ThDOyg{D~cJu+Cy2EzC=IxolHN?MH+=_TWBzF-@1P*{y^MzCX}@yen*v zipB5W$Ivk+DfZ>14*OUx&oYY3glvvF<+SaC`mI-Cz13!X+C7y*jrZaW#SC1Yorb%^ zI-yajwk~gMF6z$oLHE}uxGWh1N^KHG`Pl>)gluX@%rX}C;3q6>Rpv`QBjL)PC zAmSS)aIY?Z1FKW8BEOUm-@HO2R-z;y$xR(JmXHJ{=-P+dcJjMDwk4F35u$B(f9jZ?xfg+tu1ha zOF}j_&pDocCH)gRKmTx5Uh#7CJNnGVcEr}Js9 z-$UlJaMv>K+=r9gE|D&)@EQT3Pn=j}gPC}zs0vk1PXYh$KIrs0mZmOygM%AJbE8)m z;MEvKz9Cv2{~LP@3ig*mReu$XeRPfQwH=AI!R11PqY9$}Cd0Zb`7ofm0zS>PC#Nq4 zB8P7u@si9v2ynEZRC{Uef!cMrelrQB_Kbm?hce7AC5?LVZ-LFQ_-ioh%}*10#+=Y`X~wm22F(739o0x};G}|UfS-^;sheKoQO*iV z)s;!IHW|Y*7*ldd$BhpOa^CI0;Zm0Dg7yHm`ivuP?)ZgA+qKa3fggJ;7!$%-WJOE2fC*a~6H8@D<6YQ-H<;DkHh3f)?%PG?Y4y-#Rbj{pp zWVHp29~VY0`s%Fl=OgZ=auzuD>BHS~PD0jsJFclWqA;i35P!&(%~lGbnTgBEqpA|T z&R^thS26KtDOJ(byPNpSUeoEkS0KE&FBYwjJ%Q=T^XOXQCD6Joi{<)a@zC)#_)5LAm(Qv>X3G79MwwVyk}S2zRepZ1dk*0)VY&4O4$1Nf&TUS2f&R{5% z{>pa-565XIzqmNOw_%CZ>P*^q5J;t5Ahdi=IsJm>m(zS&>cCwB?-{Wu3|Py5JMYS%$=*CLoa zD?{|_&(5525oXSKO&ziw_IFKwsow_GwHUth)3T&umx@ zO-1|gPH`d4neD{2tY$SLa-wBx;r6bz`0Ge1U$erOA9w#UCeL%GVM7>pM6~mrTTXJ5 zGIOzW@HtGM{v9Tpe1;(tl_5<36V_Sf0w?UfUPVu(?_;DnwJGIXv%Mk<;41lYE)^c_ zoWtzeOi6jz9QNhiO5A6DmXB4`!@cQgwBqa(lCx5xl`cOZYW_m@Ds%uv8tcN9ORh|= z{1+_I=)?707jWUBB1|7=MgbS^;M?(~^g#X>W z5*B&M(`>(=xUAZrHI11^T}{?t@OB^g*%;AUVNQ~f(xiMb%AB8Z-4hkZn8B%vDcsgy=kfOg9-eAs0=ziRy{w_a9^R%vKY-Hgi}2jJQhJGfu+ z?QkmOdYz)yU>Nl!5(e)OX6X;?*}>nBV1M6I$l97n%TCw9^KH8LCP;@|q7Oh%M<9Qr zXA<9T*^Y2-A^n_Vj&9XGP_ROSHI=C`-({{eRp}MqHRA$uwm7`Nu_{0!=< zLf1~p11|Xf<>l{RfHlou0L;g-EB$|AvqBKp{AW9MaDEKKx`f?|JGDqAz>5KXm+SHTe07FB6KxWEB2(KE2Du>+B`H~mAEIE&c z-J-wA#_xWXOC5oYnJ)3D9-AMD>W45m3I3wH2&{BP-GwqW%>tSc;_NJ{|w zk{%?lB-r+RBC$5X+&;DeB*H!MLF-s7@s6QcHXWEBzEiMANK(+=W9-JRhr+Hl6r;Bo zu;Jk^cnw!2cEI8+>=_`9>AB_9R5_CUnfC}9JT>sHfgkI*8Uw}CE^uZyZgB-OywF&< zQ;*SV>&;+6yZ##vL1pFMx!`*r#`>pW~7Jb()t zIGoPk74u*AAK_NMs)9g?8?dYySly7Rbne+DxL2CbAC+(e9<( z`mixNnv6=#+2~|DR;=iVJCe;{uIDiJ&R_;JmC78#wb6#0#Ih~i?^Pq{it9)GR#V0;ZT${2cMc@e&Ln&= zDG0hZ>X77zJa}{X13U@Xh*{3fe38;V-2O&`I^t&1@X<-)LFSIEY5i8%5i5!LOOHZ| z&vbzqTS|6EA}~+w0la%G$p(J^g;}q+qwZIE+Ux!j{tEZ^#gZRjVc7xR<;n_DD0+sL zlFn#wOOtP6nRF5n6Ygtk2o&sR zV%Nh`RNLo9T2*;CRCyVzk(a^kar?pJkvth6i=yRk=J7KhTZtE~HKSD#3bf^XCmz1B z1FsxbWbM(!iu zq37x zy>S*7^JE!Ic(@u$w!Xlx-?TBw-i5y|zKDLS$FV(5c`Se9J8URegb(zJ_{arL5UszL zGwcUC5}nR%-?9=Nx1I$Xjg>Grd_6l|nn(HRceo9H*D+wOI;~BbO4G81Tzv0J@S7k! zzri~Avuq8>84q9<8K#(c-ycE>c3{wFfkU@^6CUqA4TXRA@TWBDasDm|ays$@LC1)l z6WM}hM=uV&W5kYs-UspDv*7)QV)U9AhgW7u(vGyzaBjpc_<{X=e&sXh{gsIucHf0_ zeQ!DUxM!&LLy@U!dEr=XK{_@26*p1V7r6P)A*3^$MZ9%q2QGRt&1WCrV0So;^}EP_ zy8I4`h5l!Qvm))ks!F?_`$LRcBs2}UhbvF4#T%WCV7D}nC5Fr=U5)t|>Sqr!4VU={ z1sBeBwLZ(fGKWoAID$?6Y>#c0K3q?%2PSO#$Z77Cf%`^0roNpZ9?|=Y4-L(OWhcCG z=Yj+1u_>OSx7y(wk954YLI%1**3%k+iS%Xe5Inr*2JZ(&tcb3{WA#Mt)_n&mN{PmI z^OHp$D-5YuQ=OhK6gb?cRG4q`e!AEd#8Q1F$glI3Xspng?Xdg<9Rj~erF=UMh;e7T z=X3xURRvXi2Cg4?55H*n(W<`Fe6VB$>#gX7vtO5sCraMoy2WPb*>@ganLGli{u%gF zdK}v%*u#M zQ_*oaRXiIKOw5@377cnh#*nu+G7$QGY3Sq>4Vjl7LFUHIY|<~``!b~h{A1l{ch*I; zT%ZA?4@aZ-t4@I*xf5z)g)UvBCw;y#4WhS4v!!aj@O8>=eEv6pMz{~7-__?)&nyLV zpS}?Pfg+P zwJWotXkN0kiX;y+?JqAdfLx0tIUE`9=i%T z&K2;cycCK?yTFz-fgARBKldn>X9FYs znj?JVjxy|-qeXSy?r=cJH0`sQ3ZD7z`A-ghW zXBC67+26w+WuaH-I|z1d95Eos%s%Igc9$gQnp<41*Xz5mkE4rz;cRo|Z0=I)R$LLhjP3oO zsWb7$>W%uhAtXhKq6|^Sj7^5;>^ljG=9C7dQ5q;klakEJ5Rsus8B%;Dsqmb=l@cjQ zk_IZ3CMgX{H2lu*UF%)z{R>#f^BniR_vdq6wNoiKbRextsuu;BOY<_PbXciF8(uC< z`ntzHZlHUw3z+!iyYAbakMmUF9$^S)Y^&CxXl(T}1aoF@4*`u;R_OIwZzw?I-d~i<3fwki?s>7IkE=S|vCJC|;STYmF_u&Th z*=&TBn|RqmVdrt7hy^_3h&;b?c4L?0#F!KuGiNbrxqQchN<;YeN0sB@ICVy(i+(UU za&{fU+WAX-dgDBb)Eh>7FX+Ro{$UtrB|^5+6cV{IvGTJxY&-VOeY(HyM|3%hGtXiXFyg-H~U1 z_dG+*R6Azhewba~y%X2_tb(&Y7+Y2u!^As^K`h*{B~#uA8S0f(5UxUduE+2Rd5SQj zb0%Nhm5oEz9l;K@cX;a21Tydb0`Gt1@&T2Z;9q5lT5qQa8Df1lZkZ#kGv9y`qCa8* z1Ptw4(%CDGNIqAu2V7e6_-Ky+`ZA;&J@yK=z|^4>YY__*BPW53`%TeanntaIwhQmw zgY0m%57R%N0)dUmAb#42(Sh5^3lC#Vj|rVJk7gfDp6#D%jsVV!?Vivl+(k zzU1uv^oaSdW3y-4u(NtzIClIRa@x3r4b4aa>v$z*bS)hA+c`2-UW%Q2=z{u7wNPS@ z3|n3mjDK72bNP;G%z22wd-6EQ=@grR^XU!v<-q`Uvc8FUcw*y3BsbzG0boX}XTT&_q`kA0c!z88=XQHcu{Skj=b&*hu%e1qlz&a{706An_p zf*)GiaNvLAvD$w;xe2}Cr^`J+)4>Pa15^0y^pjwI;ugLW#o!EMJ2pqyX(wm)g2J0; zXn1rl6$rf+t<59Y%k&{!iGB@d)i9nVhh4{I3PE!fEIVRJ|`E{xia zHp6nnLLmsw#u&2o%jMbF&ttH3?QirKcFUOOJxtj=?l#W|_xTU4finB8?U{N1uvZMl)Moq>!PrWd3>^V5|WfDrL zO{ZHXo8Z5_*TlbyVnC}+k-?Q$m>MSV_>jlY2nS~RF&a&5Y?$#IN0!qdaPdPQgT>!4 z2%239lMCh9g2czT;lm;tyJa>l-<5__XJ)sBUe&=={5Ld9 z@DlVu+(K(Al&OL9y`B&kFU^v!rje?FI#XMA7sHg}VOeejr``S!_Blr}`;|}mLcbU` zbDBTaRqsR3Rnl-TJAqBjyUclBS4FF>nv_|kPh(PA;OZMI_Fm!z^0G#F{9_<~N=f6K z&!5AZbLB8{!e*AM;7=2#iCDLr43o9ig^Ia@Xh!Z@YVdgj)>GUm%-K-r0ISorEe2E` zmW^i@?Plw1E<<(hL-St0FOn3o0B<({uBE6m)iJ?=f*Sp0S&OK zLXPQ%-$S|4FL-?yM*R}0?D=gTr^y~iyAO3(+3*PKXCOCsn*lu#9031JYB;UaOQ=w} z6(qJfk^RsPlrR>dUk?K>x50bGM+34HA@gV;c0{*mo^&`)$>I|6iQ(b`e$ z`>kb6XN?mz&$OlkYu3_$9m8PiZYAu#piRBz0(^RDB_Xysekajfmeu0h?ox-yh8u zOv=WdbAokUIQQ+EFo!-bF^7!0C)}T;?If9Y23^1J-Bed23b1s-Yk;NZaqGC3#Y8+M%(W;3}^ zI$2;J)|B!-f{k5teizlcD8oI!Xz@&@PK!RD#_Y5j+?30U=FxEo+{WV_a);^lN$5Fl1dAvRp-TrJ;K6^7ISIQ{xcK65uBFXboSL_V zHu+5@tue!aj2~cKv;{n>nL%zRD?xGEDmKh@E$N^4W*&QZFj*x-S7WWo_@OjQ9bF}4 z@AqO#qy@7&l1qW7#=>+BXS!o3Lr*715-(Wp1LJb3ufUjEs+O=jO4I55#DTPZ?-z(2 zCd1afR>3SqX-l6A?7CR2bQ=U##NW6QS{(g3^~<+8)Y*o3pFXeE(XP=9ikF}r4SN$ z1z%+5vu|N`x~{7y|J&(9Ch??=4o-fX|j#DB)VT|FYMyGN)!RId;u<`_OVQnxTU} zVIQ$ly@i653U4hj#AE(W_R4&8SzQ585-Bp`Ib^?DM9DY|+HU^vBYf691{t_q&J0Hmbj& zzWXNbE7496CytFxu^9oWQ=duV>42MN6Z+7Yyrf^+p~Wppqd zPwj@GYjxQuLm|`p)Q~NU>%aq!%1mw37PcUEG--*7#K~Uk`7^Pd@MxqFWoK;QAx@Vj zn;Wu+`}Jt!mSWiY$$~DLr?L5BXNsw>N1JMO{Mg}6c9$Pw+lyrGj9gd6nN|rFY@<$b zq)tC1SFzi@|L~lUO)mH=&94h+fL@9w*~Ra%VVe}i6@;Sgo=X@uONM>@zLswFYto9K zDiEd5W-$pWl#}a87bidFJ&QMRXWbWpxY3l_ZFbP8(thr9hci7HwwOw~4#UAW<}74m z3^!Bq0^Be&XKHK0psv0e_DsKs!&(XRqOS|4IVJq5*Z;%*%*+R}xUmE~Y%vjiWg zD~R@KJ;86K^?b?KY4B|SIKFw~M4B2uma=b7!x?)F`IgpL^cOEjc`Yf3E8E3+>W5H_ zTqBfBNxCU!(amTkA5 zM8*?s>Er}EPO0x5`Yb+yvn`Hb+=Vpk96yrX-jfZkqgCne-9+??SEQ|$zOdUoiT}Oo z9fps-i|RH?w0Ke|nGBYrt>=G$N#}HKxU}Gfsa;HSV%O6=xvP+*_ZB`|a#Rv_8a8wV z6I-Ii!iSsC#QWOJ&UzuuJ`#;foOIaFSHkyoS_bz{#hblv9t`ugzXG==d&rLv@~?YF z=K9#<816fU>{F!J*;gy!LU{=c`}`e`uQ-qNR11uD z1>-d788mz45?Xp@E6%NP;6BBU3p}84oZw(s)`6S9DzVR1^I_Ps z`~0m{V=+%!14-WtF!a6ZMYJarFeeD%Q{51--bk5g&&p(0pQ9L)x{ ze?Xi08E`h!knQm1&{x8ej+V*NaF&4cW+kxcdORAmZ)S0Q!ECeOn`p_=f=#2IS8SRh zFeTi#F>z}>6mp$>eA#gN9`+r+1qHKWsbsKy{0{rS)j*YZ=Uf?&43ZdEoo5kJ_HK<)U&rCDZAwodde;SOjdZksd1KQ?m~aHYt*) zFk{GG`dTox@4|&uJE-dBESj&q8Xcck!E}L<-7j>gx>hZv0=HEnElo!6VQz8|L z_RnMAgNwJA1nnKE_UI5WW&<^L7u*sHQ9*CHtR zv^pgIaAvoz7ee17A-9$z#}bO8Kz1ggak3e0D0Cq@n8FQ}?ZZvzLq4nD!dSTwmj1RM zn}xVV&2N9u6xfkB5?9mL%Tp2nkFPHgU6(;lx zZZ)-^P;)*8|7%+cXIIJ)jt;?R@73vz!eP8%ZHx8q9NGEyE0`{IfjdKoVYc>Dd}MJH zR~B}l^s5klqq75(HLSwN**X-);>h9Pbaa$(L&IbB`03{?`Vu>WF0T$_vn{W4&ej}L z+hYWG=T8@#d0&CT%@gR>;yv`^z;<}Kynuci@8GN-n6rb`n_1)eHKbUU3QwLzLbcG( zeki-0ERz&zpm+*Rs-J}FqoUx`!EkzHH=5o1ro%Er^w^$oSBP(_$BNnBurhr&HE-bP z{gzhHyD7&^6n&}X+aMe$+sAv`c){TLM`(c(2Ua;<+~4v}?(*kA3<Xr{dn=Jze(8d|q; zhT`vB)~!y?Dfcc6U-N{oPVGUJMW1me9%D{I&!l&X7t7ptl=nL#%Z`~!kXqIv<~7C? z^Mq`8kjyN$$tDUrjzoeK6@c-_L9k$<7JKYDknRsaZt$vwG$?N*y!t$k7L0D=W&Xso zZ69putLsuoh;d-kst)2ze^Zc|aSrFMl@c#7FX2_EJmRlU*ugd>{;YVf9Yui-VNe}& z6Etr)a{=zF=~wq%2)ZA^zPB4uZbAsC#{R*prz0R^$PRQ5Znmv|C-ji(6xjheIkH_h zlwH|ki`^sCV8_k3LO$x9@I7gVs-plQdxU&fvXF_3UPI9?&dhSE0o5=3!S1e|Mn}iq zKrp<^f7Kqq8V%loe!Mb8HYmeD^LE((TZ?7qzT&6YtVHXBJTG_8h*Vw`LVBhiOHvw7 zc9~HyxB3rnbHR|^oVHqIYc&h)j{QWZfZM#2G|?%S)UUH-Tc0)#7c(_uR@pbw;i~Ir)JbF()kx z-nMfXIZ2z&^~{Y=o~SS=h2niMxD6oo&0fjHQh$ z!JM#juxqv8$;!~BX5VKxb4V*!zUl~m%)0=+^OnHRUpF!0=vcPT&=F<@DbEPP9?Ze4nU;GJv?{s8KUe3m;cV3~BgfUzUY3GMmZsk55>j0V8{rKX`M9^QG zf#>8+@l{`#sH5oxTFeZfH8ulTTe?2x?o1ce{HG6o4k=()rp0=s6&M@R%hfh@R=7GJ z0DG0;u)k##H#kg#6&g&!&~GAEceNAt>Ph08QI9}>`W$AtO_PZX0&#w49p1bhiGwFE zhnEZUam&yLP+~ucA29I`X!y@T=~24u^6+=?G^31ruBXa4!Qk#`*uYzvXy91?Zgd{q z%KzGF!72t_gX;qu!7S68$(JQS;gTSbniL~C;}OT*F06uvu}!$WVL1(v8Og%N3fx<_ zBV5EHQ})+B560b|gwsj{Ze&6Y+IU-#^|xzqC?kTTXAfsT##{40?JJ<4g`0y$Gw(V3sHeI-uEOLLHt)Lv z#dl>P*nKAZs*=Zf+MU4iMMmPExk|Wn=Ue{h<{6~gszyg_^PyHJ16@Spsc6GeaUGut zL%I*pzm^$9*Cvug&pY5-h1t{7bk5S2g?x+A1?xpMM+Pkk?>k zLN_L1$b7Ntz;C!d?gLludIkToDX2at~v9jX5m_20-l+H1t>bo0Q*7rpGR_{)qKD_3=+!*g>sKaJ?+R+yo zM=H9U#V(Iaqm37}aP*EP+}T5KQ1{CpF0lGP{MuV8FhoP)O7ACr>Yu+{T~;slm%oJ# zU$VK-&0FzW+Z*U#V9)9n|AZq>2JHOMG|>nl*Z1;W0&0(Oz>~**qsCDyUN=zK_biWr z+MFS*W$Hc{suGWNH?{EHnGh6S{>*SkGW`7euA=jNJr>;S0GCi1P|O~UNkuAb^A|%} znKKQ11`mZixA&mbGzK!|!?<=`C-`vX6htRJ2eZ|~Sl_dk5aXpxCBn{ZpTe<*Q5m8 zc>bH%WBUkHiylT2GJ3dfk_yiEv!mL*!||0%BHqaB0OJ*H@O|24*qt(%eH!G;=M1QV zl)^!vUX1*%4tuE0JOnM~li0p3s|1<=s-1JX`C;{5&?Q2Wo1+>#f2@uuc)yx&=Z zJKD$M?G#UTa{nhB*i;0wO`Wiz*Nsw6TeFLmTI{Tbz;2b}Xr;jP)y@qXSsO4$xpnhZq8bNoEOcy*^3)jjv+^#{irv=RG4{tv+gMzOBQ^Y z2f~3K>V84XN$Z%ySp}&0Wrjy$m7qOBnG4;<;hd6rRGCYgn2j+#N?S%krJfLNb75961Ha z!#d#6os~@fODrCbegxhzUfo@67}Gp9&<@t_?eae`qn*V|`~2js5cZgnkK zF3fE^?T1oI>`1o2*MJ%yWq`)%6#jtaYI@NA690@i4QWx2P`cchC0~!k%%{iT*u5gi zl`6*M_@;7~O9^82XL+bw`4Loh`%~d%3;N?2#$C_m*o~_;mn2{F0)V zDem-c?MAw4@PeOeEy-%vbm5Ij88mrM1r`b4i+O(~!Mt9Wy9us7?wC58`#P1|>1Rcy zmBp|$<1REf3c1AIe!N>>h7^!B0KLT}d&8?a2{?_BTR$~iX?7q*1 zh1}u_qP6hidIPR{=WsUk9tTR_{aI>FG86>c!F4lhJXXCGzRlHT=?atJgnI-EoO-PH zoC>z*o(X}>g)HA&pQ$cP72R@_Vy2;`9G#tH3l^invsQ~r<0|>|%Y=S|TcE(pf%V`1 z4g&)>fPRNLTV|XOsrv+vY{3aw{#g^8w{B|S~w%-{H+L0YQxw(fdzQ61|knO`PpG~9pMZR!G^%;EY5Zrbi+O+YnNXU-*K*8-fY)XU< z%Q!oUWG|b-#KtYceNzE6+}6{LNdkjv$}Kq1mP!HtouH-Pt>Bo6h_Tc%@#DEC*(iM} zs@w7x4rjgMr_FwZbBm-PYvUnQc{&Uu`>$|U-SY7KaVxS4&jf`B?d3c7n~LAGKM{SY zHloM=u^8~a7>8eQLfg|StgTWP`d8b~;$!Yw#UR`;p#)VP9v4fx zNr}d98w0;>Bx%W`Vw|OM9!IQQj#`K&#Jl@gd86a{UAC#_ZY^Y(T#$^_~d95Z$kEd$BTgeV-+`m&cOn$~mycz8F?UT*Wmz z{wu3JXTwQcGGLYg!Kjs_1EJ$SV_8lMwidKOw)Ai+SjEAm!~IwvITMtA4#xH0Etpb- zH3r^uVL^rfYg!*-#2pFoDE$HCm_3`N^(!;uVr9%fn!w+Ng?QlaSlGARm+VLF#^SYd zwCMf`c%6`oo#Fb#Dmt;s(2xckR$(PWzkF@5V?xH?UW zef#PLQ#&>3kg)8$VYC?3-_1j<;~Ur+BMYW8L6_x5N5h?Oqw)62r7R}?IMmyl0m#jR z=jNx;>QanoKzbrqUE@Kum1p_PS)0h{?helDtRd~!8ZS1OkjHLVjTiV*nIPY@hSn&C z)831ZF+Wii^n=6b?<#!?32Nf}UQ4ng=?`(C;5Ky|y8%02&!mj)NumK^9ys1inGH-{ zM|S#UaBAyNw&-&kW|&HIJ7blZ{(MasX7>RNwm#s3j_1P@-9B!zel(vyxr*~VFc<^l zg2m1~LPkQELH@q%&A0CodP}A{6dh*HnWzuLg@gLBXZi(bae2p|m}rP=uG@k2S4Fn$ zaX1CLyWyba3z*rXE`ExK9)8g8fun9`xJAEibM0$K;?i9S_{}MhD*lFIbHy2WJLWeo zIUNGmR%hX;_k+;aOyJASt`aX~39vvh40oC~V)fHAU~bXJJ(#G7Eg@oDV`)<1?1ahVuo3OOocXoxqwo#-WH(~JjE^gG6B+hAJCuqBDqyQnit#jreuJ5ZCKHVxbz`%fQ z=~@r+&!334zMI7M2!4APX+!QotO^B`+QPp3UO2u;8n=iq@cG?oFgGh1-)hfetp|c> zPWpCqJS)k%K2&j&f6K5{)i1b$#~r*@{UY$me*{ZaUgP>VHmu^4IT-|~fZh`xE^eH_ zw*8ult1*P%<)X{fm2zRJgFAXxIf1tBeyCl3TcmJC3o=u3`Mq-lZe;ut*6;BF@*YRB z8&Ruip@ud2&b$Zb3SQ!-aq6TtY%TAIhuP1M7x|b>23r^Y2FYE2AU9cX2`*tw&%PU^ zo+-02mY?BZpe=hRbcb#x_rr(;FYeXC4|ukEI17|N#6BA((mc~hHuK#?e7JBHzLxO9 zH=gA*^hW@1;+YL|Eqk%hunh8s?!pZ^AHY@VnHK30rkwHtrH zCuMyqnspbK8Rdfd&BM4gZv>NhsDz>{C5&IZf!sP)aL?0si(A)S<&2ZC(O22o8icTMZB6d6BsLx;o>SM+B)5cO&NZRs}%=Bs=PM+k-Y@( zo{gYQCz`p3qkB+hGx4VsWGVN(38g;$4d8O8cE13T}$lQK5!}y0|(W?0&C$p)Ow+R-`lao&Ieda0LvGQyzObh4# z$W3Fu>Wf%+Y5_`Z+<-5FpMjCD9kiN0<)`#(k#s->4osUtRi#C6@zhAR>5C%QcS4P^ zYjG5qA@pFMIq=*ZZB{gTDnC!9jr(x@6gHiE&*^XBIS=zrF65br?OHU2rbkynQuqb2 zD5AU~h_!>8+eFs?Z!?tlc$2(B2-pl!r3KGS+4WEn%UAKD;$CC2DbK;z*Q~K_lQl&j z+m8$6!a4VkLzv^h0kEohCV9$4lV08v(eV^-`dw^BKMUJ2qG}WbX?Am$wdB~pE_u<_E8RsEhCMC=At_~!bC2EwN`xCS` z4P@53Dx%S+HE8A*b>_bPBV6yvrhVUD!J8lLSgtncocE%tGVfbH*!F8Sdp30znmshbt&XR0yS6>ux7X#I zDud{g+#;;|9!6Ha%c-PF65FDd;g>Ws{3Q5zPuJ@Sj_}*)c%%|N+ey^7PYiLhC0X3_ z&s_HhMYNhB$G$8!#S#-ImL>5-@LB3W{}v?}DrCrB#vr&15L|K60!LtN4)L~1n6f~R z?hP0WrK9t>jz&G0d`{>k8qQ^t4?PCyC8sMa{`u0>?w7zMhLDj*KRBVem z*&T~R&&!kD$cvzR&y};4I0)^T%kZ3VCRY^tg;^5yIK6QawzMZ=T-ItdX!wj56=sll z`~-fHlOKsn&QL_+2G*8yk{{otLW^$S0JDYtusCr$zdY+Zw0&EN3nwc8PyL);Mie|8 z<<2_8D{w6RmZSNY~vC3N;3OEYc7 zT!xi4wXOWZhi;dl))h<1##)7)oGI*JI8$L}3QX2(FpFg=xa-UdKJ&?Y&^>qyxA#or z_VL+hq$vEK4g1U02w58Y7Zu!4-+5#pa3T`CLg49=zu;u|F}&%Ply{j%-H6! zE8xxM8s25w37Ee8KK4oE@*3%KEG@W^yT17=jx0Cf-VF7#_3#khcbApvS>jfD?QBg7 zE!wo~dodi@x?6acKj2;qyIZ|I(bT`T6P&Yv=0_tqolAl7mycuhNCy~oV=L9vnz5U* zb5NBNJ_ic_!0cWKOU#jEy~3XK@9YumuF!uhS~?YjSP+ewuD~~cEf5&|dSv{}n$5F~ zV*U#Z*@PfFs()R;@!nP7o*U0bzbZlV6UFQkFT;)>96>QRCy}+_sc=))XP7SR4EGe; z#NTd!pm*QETH`!C`*;kTD!r+pdKlZ1nF6ks!oH!`f@(|B=_q93MwemaF-ML+pC%HJuxWT@oeDkqsen2KNiezT7JB8Mq4|&Ma8juzdVGD17w0!Z>)uPkZodhSp9_ZH z<0SCL(H!W$agV!oJ%n~2PQ%Q>#`H$ukyJ+xhUt)vR@xh3)FGksCAfsATsnl3ogsAQ ztvNo`&c;}4X1X9MmXOqocZjsWCNrg@IA*%g^ZO4EEdk^)kWj!sPQ<_t}n^x z{PGJZO_ayvci(ZOehiuKPJqhqd#Ka!F1%H~hIeaID?;{f0Jj0F;8DyAyt$zP9osHr zzvE=6yI~6-WK&>N`$>Mn;=yFe4WwbtIWXb9E=o-2VfRrnhAhyeAG2q2ht&JQZSNs^ z`RgD&tMFioqEJr9J%q(CdMlQ9*CL&9E10dk3yn7Tgw}8j=lVo2w>d8X_CH09o5zJ& z{Xk0o6bTQiEyQsKO<2BbH9Wf$4XNp8LHej2u5|8zkW5+po%s}o77Ogx+genjzKpi_ z*4ZAv*T=u{U5Mv>aH`PHBY^It~O^V?H!sD=jchqp+XrbD|{CROWJfCpC43R_^p=z8z;`jfeZfPUhzKSMbTj~d z@dEDB{sp*ZOEU2PP_$v>HkioyaOXxp7wexLgjrjDSNz?ITP?Uc2lh2f1Pj>BYoT5ns zmlyj=cHi||{9OL``MLi8=QnGC-|YW+ey(;A zeslik3ED|Wyu4V0aeH2JENKUO{S<^0h#Gs6XoNd+df@Pv1WtYG81hxDhRdUWqWoKV z(y&XyHOgHOHdmiq*9mPOg*`O)wIO?TD2_x)BUx8>O9aVniS|`pYtZ^ANVNxuUIJW84>N3~I#Vmi|Tllm|2KCGBnZ&0?xR>h1e0D_8mxx(ftoSBC&W*%N2g<1DgC>v19 zDcH@%D0Kt&=CKs6lu%^D^+vMfj$qFJN(y`SW->-~zT(^qFT&?FaweOVn~opbf8qG%L!860Bf=C!Br;j|6o$MwMnOG7 z)8<$rbJ}o;rmuTI+k}?JpM-~SvU~!`^=7i;Z^lwaNhw!9J0CmzuVB7I2y^o+g3;24 zSjUz{G&kIkWVTA;Q)llwTd{TdK@=xG| zD?m_s(S)eKkTW&{*EOBPFLj=5)$KCOG+0JITpD2AB6Bu+PBc{gr_J^*wq)CH%2A+1 z7+Zg{1xGbUF(rK`c5YNUeom8S4PE2eG^caeli9@Wlpf7|H%?*)RvUtK_GMVVa4Y$x z2$Z%2cQVk-qRkhQuvYI3x%{o<4{WStGlj(4fbvQgj4P*Dd0oSO5tas09HhjccT>P*UkK8U4#~q1c^#v`g;A9eW z^OGU9)*75*dLMfZ30qFhmGq)+Im@pb$clAN3r42H>}ly(m?SjbDyz%TL+vdd{1C6hSlr@z;=0}Ic)58)C;iNN+S$S^f{5QHXKh;cu$f3*Tgw7Q| z5zc&N)8skZRx{>wR)$?sn80fL)tK1{S5DVFh}0VEu(9?oY~JM$KUNK-*4d3%arP}t z+t$vt=Lid=YyJzDk- zeqKJoeR?v5c+I7xwUgr}^bCNu7ytM#Gee6fTuSY7 zvQCg;o+;y)_J9^nHu*Nwlq-fVcUM^3pvLMhRq%t|PT|7lPEk_cIy#{k?F+6{yFc)D447mx0p4rhEm;kkR^_@L$j#_7(YJ)3t^O13$jyCfy{4|V24 ztj@xZj?0ky!5h0QEXlJs65rl(WxJvzaYbJrNdMY_`AJ`3$jCbW96JaB^QLl2YDyGo z(~jQ9l9+qaI?Q7Om|LVId+=LCH;2{XoHy{L}+X?Cgh?IUMZ^F65*p+6bxe zN(kOpjZL~5WVJ__Q0GcA*T7?Fb$J87`ba02bzumVFPCNq?)b1DqlYo4E!~j&a0JU& zOoW*G)m)Q82iL6=&)Gh(=cjJGizB)>L-oXm@W~;De1?x@5 zRTV5rO6i)FL+vf<4;SjM!;1I^2a%Jm> z%d&X_NqFTbdt4lEMSXE|@lsPPn^={Jy6HR)&z!<8YQMznK^1US%K;Y)r%k6dZ{TeA z9hf~f2r@<`qsOISsJuQ#*dFRJizt+>+OZU`otQ__ zpS_vFVq-AxRAwcTVIXanz$wZpV29#ZI3+ZTKFM*oE~tlVO$de9-FNv%!Yw9RF_@*k zHi543DZ&(_9kd$UsC(2UwDP$OHgh+y-Krw?aV{{oRkF+@un?!3tf0XsqcPa$82|3{ zT*^^;!ev#LqEhc&QHt3pwrSOMe3cx;F6?;=_ss{h*Bf@guoHvW5yuZ)qiPY_M{mOf zwJ55)xC{=Tjo|Q23c~VcnAnsGa#@=>Rf%F4dTJJ2nH3Ih8Ftj9x`R}H4?v&gjre-! zK5zqD<_6{EGj%5k35224{Ch8D2iC)*##Hhu-pOR*WB3mqsw~7l4P}BV;eO;8=##ai z3mYO?O}-Rs70ydbF6H30k7L>V23?VRtt@F;tJ8hy-&pP=$(}u%Ng`zp`j%osaK0eERQ54138Lps8#sZGO6hQz(eT$sa`QxrRXXd+o@gREOZ@ z_)%={XFZacxdL=Y4g&YNb)s=WSK-;pfwcK#1go-&<&HX?hbKE?*u71^FgErHjQls8 zlDhUWr{i^S!E^@eX|x6L;52MhOM_|+VX8cNKdV(bkN3n!nBKof;(4XVF(E$=*1Rvr zjr!-g7v7U8ZB;kQN8EuG#g3Tq-vptJH4HSji$DRkkXyEp3b5(K27&Hjr}-E+T@X=n zpja@>_oJ=xd4V$jmAf*1B;9YH&NgkT1QX>L7^Pdp?g?Al2Oj$DWays%$JLvEQ~gGN z|7MCx(I66$WK7C*UVDoMQ4vvODvcUV6{QHtTv9ZVqEs3+kn`HRL4%@s?yb2r&l+g( zeSN;`{@g#@|A4ieb=GyRYwzdt@qil3iI|&cCq(9&)9^1vux-x|>i_yJRbJI4Sz8w9 z?wQDbJ1fajO$oZr9?c)U;?VqH0?vQ80dL*DMn#f+>q7N#{N|{Hsv0u!fx~Wz0(t;H zer%yP=Ld5^oGmKO?v9R6gV0K2O|5HBf9`eiITQ`(4W}MhVnzObTDT^Z_jk^PzstPI zbe%c$SgFm+{WhSL#XRv$S1FUzr#^{gnLk%S=3OJ&6;Vjj+R>cl4sm zEYAHTH5E?hQ^J|v{G|8*Sv>tIn%#IyM^toh-DXcbQ@$15mDO4OMj1T1bdHV()k4Yn z?tFdM8Y*!61N+t{!-V^-a5>`?S^dK1rp+GBD$J{fKLz5)w{l@fu~4kn!2$*RNE zDAWt^)O97$Shy9tehe2b&d-$hQgPvg=u*jYc^UeITq2!evpGU4o6j(;*`QM zDPgIse~X#;A~*xfe7?gQTXU(6ppAzQ&pPT<^ zWBD=IctbK|&CtNsZ@cNqhF%8Xwh{{SU(DmrTGENJDfDc4UtGH(52x-N&t|J@XkF|eRPQOm z`tUr8u|6*kFrUanGP}~U;oDux@^;ayC$^BJSs?_j8;>2nZ^OVx^Z06B2^|R82cbLC z$V9FoC9<7ygY97|w6bJh%N-o_cou9lU&WK`UGe?d$sDvQ8qZw%NWIl-_(OgkXsHBY ztWhGLKXMSJKO9PZHY8y8=NaT*0^NR39-w%czdkzCS9fkVIyYNnjJ2DIXjcgLNKz=k)6rAoqhqKpI(QWPt zx}W4L7&oX=WPLW=elm^Mr%4o#UzaG@`z|>C-Xb%&po0_Ft8uvcBUpT37yfoT&YYEv zUGJTs+jb`*>&{8B=C(U~9jq1Zt#TB<9{weigpcB%xxXmTZk=$uO`Vl5`$*fljnKTQ zH|WJbg9q8);Bt)-X1y+lb+?3L9rAaF1RgXygujdDa-Iy{p7s61~`T`BMm2Xr?_<((F>4f=uthVz$|{ zf|u7;iUZT4@n&oT*!Lya{&aBS3E$Mu zqZ^rjV2j5qVQc1Z$i1C|ljk={WchO1y7(^Lkt{{R;|@IbKpojfM{{k(4eI+y8MQQH z@ng|3`t1H$&|mVBB9Ev;RcZ*GTr&z!?6!yX9~xlM5>4(OyoI{rV9c_YHjm?tQO|B7 zFJ1US%vZ4CG1D}$o2?4hbvQ_gezU~d%R8mSNsD-ev~crV0}LPaMm*)QgSy>*FATY@ z&28;j(v+-_tNXs8^{WDeM(LgW>}|5VO!~XCh71-9R_4=RiL!Y%BEn_IsAT9EDWx5@ zA16ICxoG>w8Fwp|gkl;(O>|@z2gY=UE{O+1jKe(Z_op?u3K% z{-Zv|$<)KklH=0+xSOdRz5FbZ(Ib*!?iW$WX`U^tt5_;uQxwdRgK{}3PimOUdtloe zYj&+NL9-A!Sp{`tm+kvxXKG|{uK5xqw?CqNT^@kR@m#E$ln-9Zd$C4GL(~X_4{!rWF>5DG7(+y);i7qrR60+`Ir2Iv*4AIb9gK;VC@(3iu<^lYd6$utB6` z-_sg^L)O2c>;hK~E$@SMMT2NwmxXZXRW>zrPiL2J`C`J28ezoxK(5UjkM2-n6D5lBIwyUou%k3Mo=b;CvV+ zD%=N~y;JyorwqC{Plx3RwV)N`f~S|x<(9j049CaeGh0u#j(1Jv$RGTi%cj zuy(@%L+ZfbS~e-TAdb&eX8Cm$SmWClucSucLCH)scl;Q>|8_368&=WF^nYT*t}dAL z7}N~&wvfd_gh7tzux9`|iRo}CryPt&4Fj9I zTKG>K%NMM}VBNw99Q)r3NKE=6)=N!VuP96IlW!r}dyC2aq#KXO9>7L@{KdKvp?IrH zAVxY1wL>>-7p!wzGQ-Obr|bdhrnqmJ-n>wGc3ON7`SpM zgq_|AHiK5en|+(;$KwsOz)TrNRT*Hvj(dcU5#w=o##7q3s8EQTI2yH5bTIS5RCp4v zjaoJlye~0Bpz?x~#sJ8cB8Z)UInC6d! zt@J|hUXg?Qr8BwGHdrWH5W&$+0eH!=OtkA{O@?w04*%_pLURu`tGGism&f6WtK(tK z4QKK5%6Jk%PukL)*HhqDl?I-w)TD#u|G|}qG3YY0ghqb~ zq6KZPVz;L!X{*w6veozmQHNB7Ub7_Yl2ZjwyfKS6&v4+x!%p;Vd=UCw&?41+4q&%2 zf}QqMOPjs-!oVd}khCQh>R&0tQLoK>^werRKF|?+g-imza-OCSw&kX-qeyFUE1cBQ z$MUc+Zp-Y3KSL|zPo?C>Lcf`OM6l;=5tDG)f09(Z=PSu(jda~(D*uZf&cy{j5Na}# zr*2Tki*r@z<;GAlukFL0tL!d3a`Cv6wj56{0Ga)0@kG z#ZEFMJZKSx1J4KY^5tIW-Qln_16eHpd~zKGWCr24t8c-&Z99BjG@c(`TT5Tmy*YaL zA{;3!fw+?oh07OhFjnzBExU0{P?b{LRd+>Ovga7l=d&`d=bhHKcmb;yHGphG`-Tw z*0&0`w}s&m-L-g6 z+8kC^w}H{lwQ|o(ari)E1qOCofphPyf*(#^SX6A!v*(TGZ(60GKk7bwza0%)l0mrP ziVv@^SCy#C?a(kdly{#{1F|Y}CMKk?Z08pNmf6 zlse;D$5z4Z%`N(_kVbVoj|-(KenP-vsd=!`9xa>FK*vdq15J;}y{@UigiQum*)1NH z%`TR2OU*%X?KHgmAQtPTgwmNkrMR|zF_>3p(D}+ebZ6*1++X8FPcOH?{pq{tb+Zl{ zelz7%DYmNytk%Vw-toBVy#+q6Ek@OuEp&RoDNsB$3HFz1)4I=H_(mT+4oMq`jk|`kYX3Df z+{h544yt3^oTG3%bRk^6e~eVi6M5VVUG|OMPA<`psN*_!y#GUnZS#BZ@4`^_{PvT~ zo;8BX{IBAtL-lY#^(=(AKcSt{JaF-y=i-lsZ8T=$1k}iULjJdhbBpx0y4xlZIXA2F zH??#0-^MOHyF(;vbTP$!Pgjd|>o%Z~L|pu$ID=nhI&il>Tjif#SiCI3e}0%sP_sByj1&McqrX}Kkj={`)&U>GFfzyM!(3Qx*iAFeDng?XY9@^{&f<* zC5`4U+RGuyVFn)255hOn_p5`!C#bed{|S(hj-}XQE>#`s4bGd!#k<(htrUq|5EV2VI@xY zwL#yPJB7VJv{A6nBb5@sfrtCBnx_%}mwjKX>|=)+V}s#c{AzyWm?U5QML^-yZWj~( zm3*L52Tw^9^4YhRIyGu0f#n!ad^!3B6|Kpp^LzVY=IDVO`80<*->m}wmaaHSYO>FG zEfHLQug6YOyIUp27mrxH6;AHoNlSHijgt4WP0Fj#av;8 ztA~^--XNT9b{C924^Z<2ZJht%E&X;agVmPz>B-bD)M4`GeTau4%)OhT#%@`7b?x(G8c^WP{P@#WAWv*!8oAr3%YZ90(N{8jdu=e^BVP+ z!W>5voH2A5?w>YNY6oOOuw>W1E$#Pa59)Rz`i^xB;369x!=E_0H zE4Hq}pVEHvPjLvN-zsS*V~O1#8}q252;6}QGEah=;7%Dc20lvT$= z#48UvCpDOLd-kM&JA24ZYG#?w^_CSsbQQZ`PhQ(W6-y57<3lUvz;~xjq@Xd9|NcBo zuk3eFuUk0O3RTZ7y`I-EaYIvxWir7~9)MZ#{e>#`@hn)J3B%gI->9pT4 zj@agpMHM>iVzFLIEag*)*q84IJco}rvbpH9GIF>Hwka61`+_6%YQG{Ms;rSLFOO;S zZn==Yaw$Ag{sGCI3*g-!O>CW+%+`i)h15$o>1>)cUw_aM3tnq-l&@rGklJ6JN1ue= zv-JfoHRG*AllX740%YlL!O8Lqu+J3nXO|-R%Cwr=AIiDhk|NQy{FUg-#~#%7$B}Qj z`fxzhcF|#-5=P;BXg?5!mV45ub7BP@*wO|&`8lnza^ay8rE$8^N#RYIGb|}hr`pHb zJUR6Ug&HdG()gkH{@*CvU)B-qS9)Roqa3(B1bL%0Z_WA~h$+8r!2VScv}n_OY&qYR zd#4|wtUfiAb$cLcZYhB4@m?#+ZAqvg5Su z2)vcv0-a}GfxzW=MIG^+Y{yz{)^O>LqsR8;q~-JI^OkM$z^-;YUr!aa$J>&&=YH`_ z$`*PuXDoJhkKs9!Iz#ldB8bWK1;f~7@}7B7e74*JO_DO;^mz+3s%eBF?sGZ(Xbpv~ zb3uDEZ^%=S3{kiLgMbGEd0oOPEUm~VKOJLKlFX5D=!D85x?Jn+$E~q3Sbguki$%*5 z`Vwsq20m%LWn>QSxG&jI!n?@y%)8>lu(i0m&V~*AHSj-}%zF02=(U9#23-lpm1gJU zR&U(Ve!U4+X?~$ovwKJszUzXkZ%;J-l>!evthwWQ>Hf4sYAU^Lf`b~G6yX=cTkL<6 zcgPx4sUC-?Rp!FzKS~tpW&*DjlxTp*CGzTekW@c-aE6XMPs$&TSDxfUzdj0>cSR9B zhu1?{o0-%gAmC!Dz1yU~FG~Nwf=gZ)D81XprM#kD?-YcjAv?vPHOIhQuRCRG6CC~+ zg@d9ebL8S`nEz!9#5e1JqD>qwJCV<~P1+zfr7sqGo}r-KuVKcv3Di11R^HI#zUcVy z4k!i0$gaQ_>Euo@d}kyvQH7*SA5RVY)cNF&r}Q3y?gNP$5}Y;`v?TX6x>qf zMRqrziV3aZd^>I{)1VWe95S2i$MnJcUwT|>^oIT|)^ODcDaL6kM zw-j|^=eU)qFl&=Is`U}P&8xtn+zod13_+8!b|H2DJz?>Z8C2*w9NXLVNTZ~fj$Tm0 z?-kylpl!yhE&L!OPZvU)Y*|J13TWItOlH!q?x)`~imvO#A(HXL|FJ()_&0 zw}N&TeTSzrRCv{y)!f^$75ZHqj5DP@`pP^L?zvo>{(WwMgdIVck=d7DdG?YBX6Ipz zXQPM?dGx1p5zqgV0E36Fq$$m{bP(-1#e5{mcUVyHRv#W<;RG)(hHwuZ51d=|7VN6- z!7{D>*gvThJ@<5lgQB&#p=kt{{;}qgVIKVSmJhaMhtp>rXI|i%BN_!J;dIw5!KXM{ zFwlPw@5c#{a7_`%etIjJbT$a)p2;kzPQ_5S2{<^yiSA2u?Bun<*z=Vl&%CmRcTIao zf6ko|>l2Sa!MX?Z{gM_&zB^9m9|fZIwR_M~?oZBd#_`nv6Fix_2`sm`;DrNYctW-V z`Kx*0C7n9qc>?mm+2grvz#KmPV*y*|DY5=;PYzl+S00di8?N5k2Jb9n`15c_7r!;7KmW)G}25|pU5i-{2;=5l*;l-2!`rY@5FlB=pXEi?VqANciwwV9ZX`zv_wYXvKS+blMNCU@hp_CV9 z?Dahj<(XNvRNsexE_(?TW~ppFS|Y3uE5aL7kAmL8Ch_aAO#J@umu%A90Q}{?8@^j; z^8QM7+!g#43hY&Qa)LKJYg-Mu{)12{F&39in}cFpZ)h*=g&S&}`B&3H*$0dHQe#ku ze_zUkLX$wQ6Ss<22dHrm%`~#@mkpmnefjIW)3{<$I`7-*EYz>dMrXs0uI-B=a6!{F zUg2>NN~H5+%{DJQ^1dU_IJyT9uZ-v678~)kOB%GTJw^{pvsg(bRZ!bBmI`-xV}q(O z|5cjKjUCD%Tk%2UbIMUw;f8iso@HHeVzcm;{4x$e<((At z+#CyUF3#hgrgcKUyuCv0v1Ba0*DMddqrf>QFNoh8#^R6A^JJ)A$X9emN`!}vqW0uj zJY{zjBvfvtm8bgQ!e71d%b$-FRF^7VH0?-5hB|oXQaeRGdF1#kZ5G8HA4+P!_0Y)6 zfcNFCqv-ItG<$S!9{;-pY%Is&>(;Rt_D7(WGnC9R}Am?PWlnJ@S* zpTToiF5y)d_R-R^d`dGL2`vjX@y{(?&OEE(qBmhECb--nm-u=7;BLK`zFL*D6inHo zyP3;V$M3YwNG?1~nac}rb>kU-QX%0`Jdc}D3U#W$Isa-zW1Cr&zgm%t3kC8Sd=hkbp8~%n5d-U+GdnL&CeTR{I zU8rbpKmOA)fQOvg4*^Tncv5!<^m8j?7j;uMbWov-%f@5z7&lzBKuYlErO36z6u4Gb zMVbYDqr*uzNPmcwBi*^2HKXh3OSL7RbD02{`7Chf86f`Z0>u)^tm4gec(`vGss7kb zRe!(3gW<1W{E>&kj(dUh{Oc8RNNJBnerN(g6VIpfEuxE>7TG%aJ9e+G}M;#;r z%Z8ABG%7N;?zcqhEuW`P0UXJU+6JIx~Al;Lyg*S8F}SyD5)V6!19rTaQ}50+f-i_N+(<;8^w4EJRZb@a-<4ECT_YzJoR~JW$jkIFYXe#Te#|!RBv#tDJG%R90?`ZFcUkv(+otie&(XDyh zd(cJtA-&U-d#~qldU{mz{)6nZlpWc8%>u)stZ?L-EKnH|!Xp-SK!u}6;BZkb{XF7G zi&U9&OV~hpnr70XfDVTE>r81>#}u5hVb)NHm@xxfJOZ? zfKx57p>>T=x8%3%MV>w$tDHw!q3$%i(Tz|4*I8;CnPJQmIW?|7L{nnBpsIHf=j6sh zrffBDm1Yh@qEb{r2Ir9S8EK552%dV=52079`X>?N9e!ouK)ThH#j|N4VFVfVB4+BtFn! zhr|LwA*Y%;o%%@k8f@`g_-H!fxrM{eaG*uQBtKi6G=GWSk6W~4;O8NV70*VTh#&t%f?{#Ne4 zAdd?Tcc2%ckX3V1NK8(~_-~Fp;Hn`SnysN%1rvGS=1$mY;VarSay?H8x4?@dVmWA( z7H({~M|u*8c=J@Lk>FcSQMTz2meET756i_T-3!p+;Y{vVO3*I92D*l0NH*61N2J@M ztSuAU>KwSkn@w;lw==-9Ml~#n8Vzq{P6}Wnm9wET?NS4Y@?~La6&H_;*kyA-v`jw zx9a$)XB7YTO66gBHNckVVX;Dnu-YR8&6HJn*}aL>90913TS)4PsT6e0K{T)YPM^Ox zpysK+wEOW-%F&bZ4ujK$n9>sZ5aP=-H>`z?)d@K8YQ3=ED2>$mj^clN;<>Eao@>md zmUD!ZEiK8!h`IyNVSWy_*^Yo|kDd6I)FfH!v`-A_pv{~p-k;^#xy{n=;1ukx;NQdvf2_RneGSx?wJ`Zf$1b&@Vw4&jIg7WBKK zE8gDX4*MRL2}Z4B;LWxrcsgu5ZJB#tD3ARFAv>mHY4lq1-8%?6R~F%4M~T+j>!0&~ zQjTGK56S#8XE3(CZV@MZ%z#b}=B%`>C%)b-nO#m4fo-&k_`C8eY3K%^)npwWwP+)) zJ>!cr|5{?Fi>+Y$WgshR?4c)Wi?Qp{i=bWN&ud={z|k9nsSaKFO<5Pr>C_vaclwW> z1UtjX5NQr}qL{G51{^UU5<@;62aCqtqGj+Bbd5~IrhOLZaOr~>U!w`>f7E4ve%tf) zwF^bBTqEvcVnEXmOZy(bZFr*kDjc)(Lan8_IP$b2#rUeQp6)7?Pktn(t?VH9nXIE# zNA#%Y3}qbocCaw<&Lv^W(mD9pR~hZaS(yKyG8lF@;i&N@((Zd6Y!IaXzw$23-Wy3i z6>|C(^cvL1Xj81~cj|9m4gR->p;m?hZcy>Yru9zf6&{M(DhuvAih+lXMPJ|N5R-uhMiv^}v- zY-qX#YNMsMqop+!@BRV{<3@44!Wo*o;2+KUy^_w^NG41p1Bsl!g(g}%@`VR)C@07R zKD?JqTWJmuCmw>jo4+BvxDU^l=zsIK4WWZ~1t_gd;@YGUyiw_tC{OIk-Iwf!@#TY9 zU(1*`sVw4*gR7x)%NsIj9|rAtO^|!V9bF?@MV%yIMd2I-B>of{4cuANI7aX-x+iuW zG6Z+ubihk9ywUx6o7mf^jO?oy!mzekEQL~pvfmE0?8Iu}meee^t_kMP3!`LXlVV_1 zm1NU;+!eR~>x!En?L@s(Ik@XfCsaFfSF%wZhW(~BFf%`t!?pyX-u0Q}7B-eNj;XRz zw=sA!xdB#pxeIzhYcSyM4tjf=T{Qk)=fiVW(|_A5;Z)F8d@@3pT|&*_UziOQ)AttQV zpvIAjujs~XExG>3j;M6e5@!?*#JmGz+0l3q|M6~zcAE@du_lYfgN9f)w-g%EG}+c5 zm+$Ti!uH13a60?0IQv6^s51NlS-Tiw^|7^hR-prXs8qs|juY|Gc0E#@GMb09gOOo1{eR_=D9@u6BPbcpQqtu$iaDgpeNiNO>Er=@yA&K5Ov#=qOyXaw+=NWPqEC zLDisaN4tMRm@NS78HpC3LaQ;q)mW&98j8^O73b z(v%mnNuv^kiko%trg0{o{56{=D{PkA+v=mn#!|40_du)XQ*o5WQ2ctjBae7Imix`< zfCKE@VZvW?q3G5>Sg8x*n0bD5_rWx-{OC;Ct2VODv)*`r@N)=1j_4piBYf1@z@6Nz z_&1-HpF7qpUe!HA`i&`k#$v0O^XfUpiz$-D6~O&tE?=1T8Y-nXeYRgQxV)JPEA35r z{J&iwh^pe1+5?>PuwAsAN_1mnAWxmY0|xwf1M@R~(^Z#k(1iCPzuyv2{?bgNHI>-n zyd}COJ)rSR6|mXy4WyiE2K`zMT)n%R9?$KEeH2sRk7hqSveA)ySYHI%UCuIy!0xK=$>dr$e3H5eXww-gyJ}tAqvv0`I>(ANv&%rgdMA52)<0lUmoM+=LW@OSky zN?T@u*;@M!Q=GVK4L<3CeZzV3wmeLRok_~%tvUhotVe;`eO>gHdKGv3 z3_x+C2k(O>Xq#OJK@SqJ`Aq|DbXqD}4(KEEUg5-d^D7|M!HRtJ^zl{X6R2pE$R{hW zh-O`zg{%-S9MwG)--kuOv9Jy}Lh1lP`AB|KtvK{aNXo0a-Uv8cCcI61TpfDBTaF^ zs3C|W9Ps+ytF&c`x_DTkH}vR`N7V)0_?i9;>OJ?o+#32~zmOqZG_51I-h2kzXPHsh z#}Zs>e4Ym27l;a!T9#9_XxOG2DIe0GH5T+|%NtwK)~#B4d#Yo@i9&Ea(o0bOktxLt z6Y-Hr5}kad$>Z7%itQe6VRE|`Cj0{U_v?zN_H~myso=YdUDzL(*U|vJ-%fQ=bC9>X%UZjevFoS<&sI@Dv5ZNfzfuJ{Pmaz*XV0N z)QCl#BkiZ1U#_6w*^{wi-C)?gE}c6#597MxzHFoH&Cu@wG$d!!lCL_DI7^57?dpS5 zYP|XBB5QsZ7cHD<7|H7f%HePQXXrP^iC*~ihK#XOg{gUeBnkL<{C@YNU^%EOTDrY; z!Eqlc=R<(tax;o`cORscHy_Z(fUW$oemf}~n7}%9g~C56pQwAFP&D1W1>5?M;L}^z zqr;d9SX3_pULX*fty;w6_VH zJ(_9e<|%wfveL}o9tD!^jaTJo!lB{qqJ>%(IrdqD%Oq;&jr(cB@@4vLJH-i3ERCe# z#eZNb7{dT6q{AaU#Dwd&W!vqCiXj=s(CvT?UV4yBr8_6_CS9OgW?_(R)?doT4CapS zYGpg#YGbeIXGrV!SloHCK{mBe+Q$rELuYF`vMjq@th4vQuo5pU^Ht-yCc`13+!5(@ zIk(sq@p-4tn5YRTdON}@iAE8*ca6N^8`Hx&J+5+@j?emdaj0CKXFY!ho9d+RtLT95 zK5vDmi{g2Qy<`XNby*12%HnTxoPnFyf^d2fSASi^dOeKg7aOm^p?w!cb9DvoP?=4S zZTj%u$uVH{zyQmwLwVO7b=KW?Q(Tpz$6sj|E%#L8m5*(4$JBJ%?iR&Ae6?BKyq%iu zzl$H;BIQO0A5g;ET$bm(hkd(4P@`|T!k z*It4*Vgj-2vXv0{^oKYK6ma7@D{h(8hrd*eV~31g+%KGO~ccRpTQ@|;JImMre_CxNaGv@#ct+@ZnPy*j)N4y8o?&Kl63qsGAu- zO&~f~ypC10!zFUE6X1+FG`saT$Tg2s_mDBHmtw?a)4qc8${I>L7>u8;UJ+(@ZzAW^ zld^etd~oK9j{J^~(z~)fFf`biyV<+pboF}_9&kdoGiNCd`}|FqH&6vzzw{wLk3h7a zgsf3h1W$t&LdCr_zIHR7M+Q4UVOcR4p4JrBpV=UIni=uFDG%V8S_U5UETtahyP)q0 zX$ITmfVp~S_^?9&7pzc${8JV5ex4V8tvBLbx86~p)n@+mz>;TZ5WOntAsN*d3JnI$ zv`wcs8y)Z?zk539ty#j}=D1Y(2eoq|x(rT$5{I>vQR9c}bEK~7Djnfh z)HYbMz#l$jeFBI4y(hx{zQyD!0X7#80Qr*5bkQ*%TyMr;=PsXN&P#LLyfqE)&FRaP z;w|W+S_bA{9z#LF7;Krd1U(a?Fk7t$PP=*l6H2;Cq{{KUaJn@r+#inLAE{zY{(D&( z_u;^<cmwf`T2!2i!*@cmzYfZzY{119)I`~M&Q zLcss>7j(KU#rsxA4ekkL91#Jym>7#1UqySs)j#-h5h|#_VPaB&RweVYc{jQ z_X~bGU->09EEg4nepH5N@HrRmNzDxXs_x=Soqgi+@-NU`Cr^0rvRT-E##E*^!B@1j z0Z9CmRXf`FC}*wgnp2Lf*~wK%to{sc zK|95gvRa{P`j*;&zABV^&jBWFF&6_U8m`^Z5o*S&ka6m9F=F)+VXya8*frp$@bRV- zOrd14hithJ75xBSzJDnG*iz?m`@R+EnVEz9{AQ>soIp8lqXo@|7$JIMT=b;>m%lLh zfB6f6|NHp`{ms;KheJ(onSEBw` zx?+h?>DJEgg++vc>S+D4gedxbe9912F1) zz37xJk>S+~VAby=Tw#14!c2`YImZn>pV!L<-%EnziRqYrM*;6`?j-lQ{*at1P6#g) z^5F$4QrDZ->@ax|=zhtds|)uDQ<4q~-d5wGeR(pDR$3=0T~Wp;W65-UQHw*p6r>Zu zkvtEW3l#y9-E>D0J36j`fhh^RZ{d2B?|BXw|4KX*mqImO*~Ddcgo|1=puJGKYfUr7 z%$$+zmo*-jsqes-)Fu@CaOYF=v~f=QII;XcU3f}IU}23FJ9xZ=Jip^~e02&>|FRrb z{QE;)hu##d_ZXqy@JFB%m(DuNkATY*O`h{~7oAXCBffYxgtj>Pz(Qp=jw~~$>BUk< z^O8H;#IJ>_^h#=Ri07k6)nIXAJ=t8T6r3Z0s@`6Ki4%OVQK=Jd9Gi@bgD!z#!W?Kh zr^~xt7mIS!OZ0oVH?W5tU$7`elUyB+ERcbF>m=d$PdohhE(mvTGvb(h9qE42LjCF@ z#E}z{(CUmGMAb!M^Zqvwt15$6F?T6wyED8UY0KMMJwW)}EZEjNkWv3oQgXQ}8{~gg z*jjQ$9yrn#UzHl-0lAdvDIWnVz4r>AZk~XCn`TkZMH21s;RmwNy$8#kdrIc7RXk$G z0=8YgUAQzl8e?O%IXHiq)aUiV?ZPxZA5lepegg-&e4?4#g0c9P0`5GVgfq9A(Ic;p zqC6-B*XKWgk$1u+`l=D{K41rilO9up%`ULhTOiyET7iS@^62-D{&;+bZ*6#{1-Gx9 zi{YzIi1X7!v1w*+R9>agi9RFp*%Y$;+Z-;TTPG1EA<85K?nIJkj&l`LB%<^z#tQj?L8_4nD^u>GnIr)iG1`r zV81v@+y^eF{zCN-iTb4he8luWI5qPWyit>O&$d5gtIkUK)mV{E&94UqvqRLLm5NJJ z6}dY2fy{VhGTnVSR*cnIh95TML&r6CF!s|_>e8nXVz*Djz}WXdnKvoR{2djH&_-)4 zl+IUgKJ;9P8hQ=j#^MTkF~0*^3ZXcfW^h(b1KEwxheIa4ang&9RCmKpBK6n96s-c` zhu<6VzS+Hd@ym%zfi|inUC>qE#|%J<7y&XI0tL z-4`AEUKaQD^Cj6k1@!J|hwVL>>Q{B;sNvJm=x{W&+9qK`eX2ZQ=P%gaKLU^BsNv&E zg%jBt&7%LvMzZ)rQf|ZkZ(t(mfp`VgF_SKQPFXuB*sf#yux@~~hoU>ry*j2Q7 zONc}?GiSxdZ4`FM5bJ-f#SuH6NHd*|JU{s&>{q@Hj_&%ONu8XVRYV%5oGB@k}20p)GD*jH?5FU>zrsm*s@bvmg`|s?KuLAx?qS6_T*JB`{0PCAL2`S9$D#}Ct>C( zvdp+f^S=v}{@V{WnCA&A$bdVYS%q=Zxtyh0LzaC9;kIyR9<^aVOvsrd8{=9`YK1Nw zKF*gW-MSz&woU?n^@TX*>Ikw)?T?~!m6-i1g@P=rVbAP|caPinFI-c;FXUAE=40 zp|^!7<;7^PJ6NJ#PozzbKdX1uo>P%|$Sy ze>SdC8;>0e&%?Rvo1t@RCk$EGm+kzey;YD8HRP9p!^1~na;rbiE(ydHMS1X}b&$L^ zQWN9kmMk;M6I>VN;UM!(^2R_jH1EF#n!Z=jpx=xIOC}wf2-XkKQYUl2ToVV$C`iWN99g>AMJ^Uq;uG!_gyL%uEN?*4z#Gd2M+t< zMF~BsVXd)0Ug~AR4T1f!*Jo!=3RPe`^J79`lLHSd2*8##+oAK2!T43WZLA;ji)e%$ zZ>!u(cF$L%x9>+dHOd3W`E(|kw-FaO^}@!|BHChl71|6mIq>jKiMVwN__{5>EObPx zlHRpiC;V8!Vg{SskY*4&61dHr#914hxiL}df=qZ$anlU3*QT#Dafmm#h-)D^@TIg2 z*W=2nhmuKiDjcjZ6Ec75@$rm%Y$0Z#;&BsH@1!H)=l)4M&!@7}FT3NJ3ko=I#vwYe zd^y?=*+G|=RZ5veWzsK+;Sn0Wa9O-GZvz9IJywQOKkM*DiC!2})dRbZHD{;po5gsK zVpy&$b>nMRg6cPd0d;DaT)vF&e3{6n2d0U$#FN6(xw>o-J&@;r0v@zr2Q_x86wU{9 zpkwx_Lcfg{xa`;v?y+ewR@q^L}mhnEE0Dn96;%v86%&k~^gD1N@0k=(^@$X?DtlyLh^HZ0h?8a9*JSrZ(v`k_ZDX*t^qB~5zby5tqUP$9U zsZxHz6R~B<4f&;h8nk_{7Kcms`17YL=unh{^p?(+C7cSy`8{1Yb&ey0ay$7SdkgiK zocZdpGjMlEG-OKOXWylzV*Hm^P>$}xsGL6RTC@_4rX`B2HcB(G)m`D-KQkUXN$TEr zYo)mZ50J*CN+|0#2*scMBwo&J2$`FSTkr3HVZI%xLxq>9r22>y6}36S#1k(ygmQS4 zJJ^hJ!aX@EG%UG-uo@B(itIsrVajW{|UnnRzYrpE6#D9 zi)Xb9`T5lhj;+w*y|z~&cv~uGFCKzRwL-D#S~^epaT&5ZFCvp}z2MBcyu7Esp(Kb9bnfVsrX;Av#{}Af3`Yp!NX^S z;oyV{C{|U5+&gBx|9dP=mwE}0nvMy!xsgKR7&}aeieQD-I{HYx2udqNC+DMNnAiq! zgR<~`+#YJRzDd18dvW9Q3Ygf{7>||T626Vl#NStP=Rshl=>qDNx2kM za-BL|I|;9<-oX%w^|7Rb5~=x5hq28Qs7A2J6ea|Spi%$wg8WaSFplZ zq~w#wMdt{KUOS)@t$G_M&E1r^ZpmF}xRQ@1QR~GjH50yhxk_}_Ho+$v<9M1+N1UgW z&bLx6$>jTSc_O@p#l!8nL&kX0IW1YGRr~R_UjM;uQ!ARf=sGntS@M=NE4(+gE6Hc~ zzzdVh=)uyV;)~>V@zw2dII68Lrx&>6yr-_L{z2O5j6F&Q1}=2pYAr_nbcS++0Erv) z86@orl*b+x52o%CmcE%!)1}7Ccc`%OnSt2?gdM1_+YoTYWm(bT+0XTWjC@!}f z#9JqO^X(a-)THGhsx`)7bJPTylinsK3?DC9UpBziP48&<|JT)mr z8B)m*N^xI%BWVy7X^>PhRYF2(L{yYn(m;wzBhiTKY@*U2qEeAYQc8wOrRjG)-_P&) z=UM)=*1g>4vaWs3KJWc{Re7>s5_bIU^S61kr7F1es}FV<&4B9;O0a*wEt=6f>TG`j z19ExxGg*!-m3Fe3%dFspHRC0JZN{=cYpL5~2rk)i6J{;h$sf|vMa3Y-ZYc;XwDJlF zYxh9^nhP*?+Gx7_ES*0UvAk?s{SPn>Fu{0ZnRyFv(mFM_$F z4~n+#QOD!+yjiVDA;`?QE_$&jfVAAtgP)ip+LjOGI(NKfy^#`BvgH7;PKbFMqXg4{ zHd`sbi-KN`WJMY}IB~QIE1sCe5`}r=5znTxG1sSJ;NdUqqUs%{pOc6aVy18!%Rce_ ze{M4M$V2Se;_Wzg={d~1I2ebzOTxTiK5U|qC%e=%8m%&xuxRRXno@M0>6||R^PdLbn)oVcwXlPD zQ)3pRHHChuePDe9%S8VB>;-ye1kJw>xJj6sttzy|d+as%t<_`RMLt~91oE+MwiF*KMx7H)=|Sx>awuxxRl^2ShWcez zD)W-vXv~1VG+Ffdx*6r8OZm@+XIR;@ckpK860DkIj>>Qxc%+i6vGK%YnbXQ)#xrU~(95imHc;!T-WVP@LDumQ~(l>w-Et znE~I~Cksm)sU(I^I`!#08&1!CqS&LhbUt*|czE>B7dx-nVyE49y3+g^{u!(Wm4BE1M=)}MlcJsgA`&&2eQAb6*@m;0kyz<(B!l>hC9R`Sjf|UjHoPj!mQZrccndS;*8cjK`*@O6dO7NHG1%VEB?)(r74R zehS%)8BBz>vq$(zriWSZsjJwv!;=#ESQ;NV5FhN&K*crb;5}6r<%@&q_>`gSxkWx% zxO%Z{fvfa<)*$-F-{(DqgLjH=5BqRWjY3{VU`>1mi^(bk<|jeB+g79JcrT1|)SwF= z%rGt5fZ2}@V?+BVv*vr(xtLEuFyzfHdhjkC($>Y$Z0BSQYDvKHg`=UT&W&X)HD!Ze zM#GFztMKLD-MGlm0*nutLvr0ENbOq3d>)jcMY}DGn|g`PTz`y-`VFjGxM;HfP=g(Yd{}ENc=>&gRhK+iX%8 zt3|E+d%m7_GT%*SnEQtaVJ-Gz_S+O%Aw7#cO)#u~~R}ax3o`uLhaIEJ=#ckDpl> zPq_jmTG`E&U;JL^qg^(Gnh;&^wk%=h(-&du3SG1d-Gf12<_P5IW$@!cBII~0W666V zpC-)smh`No$I1?PWkCjW3{tMx|WN~J68 zyWcD3GRXsDv{TDomhOTtOMWn$+)1e4h$sm(RNaN=r_t}pyM|8S}?qBxWjgx@uBnD9o)7V^_-(!I8N*d!wx+g^xUmN;x851Kfic7 zHggGzpKf6rL$8oaCV%MD3v^a>zn z#ZIm*v4-&}3(>PY2Q0SzWtmU%x%{nBaIV{r3z(aWX)7;M{lc-BT`Z3l_ATJOZXcWO z^^(bV&!i_G4DdyP6zT8%A(|o>YIk3nL4WjKv(u+9P|spb+P6m&O0hhq=ks(&-&SZ9b6RD+b0L6-ZfS&>Z`kL)~sPPzyi-Q`itJ>b+S#b||u;dCP zSqx|Uk`nOA1Q&WTbv|{P3b~;nCM=!#W8XzJ%3XQ@o*xfkbB2t97W;ULmoMao^=@UC z-rZzcXD8DF(;T`Q+6=`%glE}UjjfS$G2Ppc$}G*toOgru zv!QivIfX1W#tn;v`E3?^WM+e5pQKQE zcRI^`I|H?+K7gw949>SPi)&Y$U21Y9kCoJF(`}nLnlRi2W!DOs)Ydl89%TTZ4_ngG zuj}E_A#3_;ybC?_#3^Qq2m^0f)8$1{SkfBC?Y6xkusdwoyk(OqHk`wQ!7J%Nk%*6t zI|uT?8`%X9EtF|o2{X#`ILqXN{5jhoIwQR8S|T@N(y7tll-b2i&hgp3~8OccYm&2rRz?JU=&5bC;S?^$kk`XO>?IPIaji}Mb zloooq(s84MWkX^N$1Ns4AwZ{hze zDdJN$1)=}6Veoai8HstOVPLrnIq&((zCBuwg)==VG;TI^uaPe;d~*$Ii-TZy{6Ms> zH^IGLzshWqN0R8}T-?N^f!_=X`m?|uQzr{NS>?gxF5Sh?8DL3L%WpxrP8?jb-bdfZ zv~sVWUS@4u+(7fs2>L1X@Km1(ww{+msl#asDHfaYu&bKiHhc-&zDlsB{0nA!TT^NN zscYH57D~RV~VAb(%ZbRlQzT5$^?~&M47niV)72OYv@tKI>0%!!ykc-?-@weB3b>1R4X!3^jMODH&r9RjxbDdwM?m@~jkD1l3-B54%n|pq%8(vg@ zU{&vjiUurH!ROy*($N__H#Ecu>(zaY~0YPu+#2xF|z-OI{DD1s8TX!%YO*5*n>qiLg*qX`W1eT^%_AU$@#BiTpJIDi!Wmhpj=jMKyt!HS_?jX^Z(z~YwvgBtDp&}< zv#FKo7_&f~c0a#E_qcVK(cz8v9wejml22^HwP)~IJ|CPm$)f!XU)u1c2$J2eK;_3v zUbFZMd4021d#;D$E_>`9IsWh8{- z8~Igb?nC7`*;8lA?xfu0dZx`K--XR88>7m`*@lAquM;HOD2Lm-hER|A zeq3|i8BgwtA@id-B$Ca+h4#jz<<4Z$!s*qHx`$k08~ZiMktXv1?K< zOFui7)(mi<%ksBaQ}cK_C+p10vfL?Nur6IW>VQcbl0g6dedrNbI4_a|aI8Tz{26kC zL-}>sMyjYkPz(E?d$7K>o0y)sHFdr^%%3++=f>K;Wj8dPam}OzuKl$;^QnEu9}XHo z%@eFx@X>gH@22EB;})koql#@lHH}UvS!3+%C9vrGBwF#Ri}P5UK-2#T|J%C%n9o32 z8gVt8dZ&8BnN#uTY~e_EXpD~nB2xrkL1EXB$)As2G$9`shsW!;LsP@kR3rN^oBD^W?b z{#826bMoO|96k+NH|FA4he-0Puw-!^Rowc$Yq+50g-kI*0$hvFkxa)jT6l32+uz{0c9fny3dG93^ir0V%96@*$ihGiN32soZ*5cBoqvF(ju7Y0%4fh%$9fhZQA}?` zOW@}6jj-};Ci9*7+q!pV8oOoohFvMphoX2#TAsWOQg_&bLFql_94=(Jy00?-`$kkJ zIgVO4$x@~C61KkR49k6RfEy_tiI;{8HZNWMf5B`aA=wqfp$}uwp8}CPU3>;{{&efP8{gd}NXUX^;Es|_4 zU?)W_Fu?vVT&{lv6AY>ZzTS44Uow-;v$?_Dk|<|N7meuKNNr{!*p^-PM4?B!2fG;Z z7H*ve8v5%qyE}vvM%?eSt`QH|+~Y&>VNnH6GVNtc+t<;-0~6W#fZJdj6(_LJw&JpY z!Fcx6YJ7kDXjx2!G|subnI7L=#jl8)O9!+KNU2!?Qzq;n9qoCncDXF5{Z_+!Z^&EIuxqy;^76r~oVxPVVM&E!;W-1yDt@q{-TPyHXc{A$2Q3`s`_u#~}3k0*X zEw~26-~)&0#AMv5#VL@MJy?UU6-ML5d*jgL(LwrByAIO6)iUP%k|lrDf*&EO6zwvP zJvlrVRx6iq+D;J^q`ig~et*Q`eIB6U97%Gy+|Tw&OJi+H6jfMrEbFoXj$g3=({lFF zD}5)rU^k8XT|W!Qt+~c0wwJj#1A1Xng+T0epD57J_wNGp~1h zDM7uHH5cy3e_MC5O>g{hSeqe=6}+>mS+Ejw%F=M)yup|^dm^T5Hn5sWGr&+!j-K4~ z;qouoQD>4X>07SC@a`qFE-9Qv9R)OWQ>2AICZWg9-Pq$lk*Uc_(TWjLbRp3IZ}~st z(`hyN8|`K_j|RiP#dB%(R2?$j@>mpAs6oE3+F_t}0hMd!a+BvOVDn;qEYPcF1FP*w z>v}k%f(N}2=6^TN7*5YJj3{)RA%xo;fp1!fiX%IiS;Yl7HqQ^W4`kxb1_c=*$_Ib&KF7C>i z``3=ho5SB5@gk>oN47d^JpCEG8I)Rk*h*=661Tp`J|*3SSNaAdChWrvKF`EH&8O`1 z{^2xf^BFkHDPm^MOp3le5;yJ)MYHGT6ym%R_dbgOoh&6hbD>)FjF-hC(>?e(R+z=? zkRn&(Z0@Le1|IF$&!s(l#%i0SSeX9k(uPhQ_GFGC^y{a79;!OdVmDqTk;{q* zYCosTY}S2*`ug+WJw=nUW)39L3`;zju84u-mavM}SyW!L2mcwCvm5tzk?T)4I<^&P zu*NrLutI~L#|r$Dm^l8=u0~kaZ^RB1onk{%HCc^)K6q=`i0pf1q5RVyz9G|uDJ^j1 zjyK+inQqtN>eP2o5x4+*xODDr)mCgB=!IXbLRj$5Zs`6LgzGN+gh2gr5SNaha5W^OQeJnfGDX`5g-Ppki76Ri~4o&Y)LtCj)SSOakNlzF_70IWBdmLkm zJuDc3&Ba+ltON#~+Q`QBO(&aqda!Mez<`bZ0NZ^Aqh8Sm*5>QOg$#KG0WAVE-_I6b zhmT{+KF+}vn8|ioiOBq>V6DDsiIbGY&?bC6{wNW6wYSaidGTDq7C8d0mu|x*xesjJ zizrdYg7d5iSe#kT`xF|bdsY*h6`%;rwh4L2vUQy&Q?PD-rW z(ikq}=;P)bZR&RnMh&5-c28Ch&l?tlihmr|-L>QsW-LJI1Urg#oX1Ygl0&J&YbbSG z6XIuG;(RR*!GvZpvJ}_ETjsrBwV{d26POcnPevkHZo$4~u~-u(OF4h8@OBGLMbhSZ z?7;~K*kv~odrcGB8C6L#?QmteS~9$%MgXRdRK|!xEppXaf>+Agu} zNg6|NgswQgOPa}4SINR>-{%lyXibA2?gHfzM_Ja8N1zcXu-a`DaB{3a)>&8c8{Q6s zgHjXu=iNMO3Yy9mZ(NS~$M-@`?OZJFl?U5hLO?O0;8^^Urdl8_q+9a86YilVUwpxDPuhFw6grX=kp}>anh-8nhP* zdycLrY}q3;IUb)riDFwX%HZk4g4MRA zgKz4|hy0>%{3~fKrlo$H^=IBk$=RBa`_YfI%QvvT8R6L9YlV)+6DjJ~NNU&7q|LVw z`6UzJp^%X{Z1bD9&@Y2d`A8@hzGLpCQdkxl#(Z%$rM)Wvf0co#sWpsr=lD{2Ulg4# zTE*?MKT(#>XWnYT?#d`9VvOYuw4Z4Y>hdt}M6xwT{Iu&kSea83OP|~he=8^8 zUYR>wVWR?#=g;AA?-oAFc?W=DEgP`L8BA6WL(LVZ(JyTx+P`RKIq_A@*-YRlrWXpX zhdmeteq}E;ebH`-KYlLrq3*$7S?^a3)N0s>Hy2DMzUwu+^zjzEw55`z@7PRsSAWA5 zzj)LfJs!#~tRvFsg+eDCY_^s}_o-@}`NQk{27V|cC?8>m-tWZCA*JkXM6qaF5~9)oZp(0oK)OOCVe>zB^q5(K5-SDGKypqqC&}|brzO}8-RO=4O|a6 z1Ls?Bb0g-Z@k$z-KsHVl|IUxcg2i8%v5qjJB&LOmGW+1^+F4Z7ZiaQIjd0Ww5x!7X z#n1m1Al@L5|IXk~b!d6*fDxj&aezY4w8AOB!)U$JPn^=x{5)gF6nMYv*q01UT0 z0lF$GtW{uY%j(af(}A+M4yACgrwYVZ`&0jL6?|#A8QuhpqzIiM^x8+2&+=Kz#ZLtGMKXZ~;SPSKLVbqdW39E#;nX{@u`+RmXt3eLn+u^}l?#-vWIjyX6 zWf!yhoD8)!#_ZBBJ)Hl^4P#`1Hcvjt$zDB;ty>G>lcoxLn3};1KdPgO-nD~eJkFrtHOC+=%t+|lp9KH4U2NN;Q{dtt%p^Hp z}yjgb;2Cz@KD3|YvjrKff8E~;*GQ9_F$5)8@7~=!kg`p z9BkbL>EkMSYspMlXV}WNd&G;hJG61rAn~&6ORdl++ZT_h=u_f)arAkX#p|nTK~KLH zEu4Ou)!N$8)RDs7UuzP6=_W{_yIlHdRTeCh!$OXj(#UhesJmq!H_ynIt#=5gf?r$N zzw?Vp{j9LL8$5u#UPr-FKRkzls_QNuD$?}j;%5%^MdHwLCE;QP)5p~o|ejZ_t*cRiXg^1ddWj?u(>nRciW z76muuo#P(H0kOzH20l?YjX{;ca)kayvLc`7bJ4$U8CTcRRX%4KgK>P z1~B(Wqu8F@gP5Zph2ENuWMi=mwWr9?5@CP#xK|7>SrxJK^)J9rGnZYzR0d&X@7S0P zYV>SS084x~R$vCSi|S_&CqttHqVgGYDAl}>(_a||MglBKVOqIJ_L(9*i`1gkuY!d= zw+{9E8O9fdWbn1q50~||db80lCSX8cJU+^@;1wPVOqFt7i1Zftvm?A&qV=V+Wx|@6 zS*sz9f5L6A@xw1?&*Px;8Qg)ATLSy+Bpbf^0z0zgB?~q4r_0m*s9bS9`@L}=E6ovl zA;Ov1<={}<_TGt?+@eNLt!CrdZ<;vJDUMe_m&Qcnun#T*tPZFs2iEj<|2VKgMLn;uqaa@ceE@e$RjNqXdRm z__B7EKGhB%Bp=7L1umGmkV7+0i(EYAvDo(*%q$uW)`O0f$iMx~it^jojyJl(jMW%k zbX*CN?<@eL1C6ZyX9^oqk%Vh&XQOjzAnSTB4~8W&7%~0`%PSd*Q`LgeWUx1-6w6}9 zP~rZ`CKk3tjTg8VKSAB4 zNUEzJ0C#-ENL6(kUjM<-7KP1dX{`v_{qOnlHZhEwH5}Vjbf|ojGo^i$WdrvpklSHn zQc;#9H_b{u|9dKE*nVS96<68H!B%+mKs%eS)Wgn;O=B**A3(Nt0VwP0vGRR}bbjC* z{P)=g$Gy*DkCiwYnEZ>$3p^rrN*{k2%fQe_GVJr5A=n|zJ1E#+0JHUxpub=*Tls7U z)z6)RC&QfB%lo^?^Vt!&ZW>Hgokqf4^;Nhp?BAaGhLA_eE?hGuopt%Ir@X_v!NBS= zM7$AWPlv_9g|5jMnQKi;R_}+fg&eoxi#TrBlLEX52)Z|ILFF4kbn{}I zd&+@pJd_IJIp&!A`xbk*EsdQRuoZhoK4O6>e$Xk30t)wHWyd^mNbzv`sX7kLPb#88 z<8XR4Zz{U@1#smBKJ@UFJ%~?r;KqH@#(SIF!Lv%AhN|VUW8Tl1-1<*tC(mxCC7xDR z&&J21O|t{4zBfdL1B=M!pDa!EYbw)iDuj_s-t*(?R4BauIBYp9_(tUSG6_k6c{PmD zg^D;FUiOeB-JHUXC3(U48i5D&H5ncRJ^|0(;i$2AKdKnSuvh=2q1i_gJI7`)t#|ME zuP!TafzciuEBFKQUf6zzR za_Ky)-Ef6D2^{L+L($ex;sWsSgUwVe?7{Q;JSbN_0VR#@!3i%T?x4HSuL{1-l#cA6 zhJS+~IKGH)I=q96x2@x9Po7~a_U7#7kA*Bth%4B}c+D313CET*ppfO{-&^7QA9K61t#$Gy3Pv-@|i`%2A zc7HY-dF>$nU6PH{?^QsTQw8cTm_{`X#%+^u3>z88Adw=qCQ^aZ7pP3lcu>X#?3QFj6LXD?7=yS~% zzLFJ<(iuykgMRRFj)PG7Bfw`MDI`Hn6+=PUuq5=vl@w#-rwQDT4_?8 ze-KUIW@0HPxQzDNA6oSvw7C)#7D zTRLV{o`=ONgJIwg2|Vv}33Rgz>AH#v9u~z|Z@KTr@BWp*l?23M_;^)zI97?a-BqL1 zq%Llw{}Biu;>S9qX4AvAN#qwHc>YRNn2x~FcpaCF=OS&0Ke-1M(n&lelEL~x6PTx; zJ+@Tnlh3BH@X2v+*_q{c;Jb}4Eoe$YrMGq5TnP*Q%7-XgzFbG>vs=*^S4G^tW9p7}g10uEAM&Of&-z)r*n2syKh=r!tL~`^+r=DAKJF zr&ves3T#h##rJ6;a~Ipq>OTm+ybI+lCFB~n@Z1#G_T3rPPd@?^jKH>-a!5V?74py# zV^;={mBizccclj*>uMzLGjJj%91muf{zYK)qCR%VWja@NOW5Pw5bm*TlUZwnE7&A8 zuro{TL7^oIrOuc@^Aio~7<-;i?Gt00Hb?V8Te^6)lT|Qeh79VaM`EhMcTr)eDlMIw z$n`vp65Z2phnsktOCKzFBF@jo#Ko;l+3y<{Ewqd}iW1?tm9=p9Wr=H#JK(B4#;AAi zJ1bcmiAO8mm)u(10sg9p_C<%G^WO>#(wYDZ*V@zFGv?6sXC7vRj;1k%`odb2g7(hY z)Hi7rKg)XxJ}R6i__)Tyn75Af{klb5%W9jn~WjxrN z!MFfznsi|%7R9@=)$W6+_SH5loNY-*)(+s*22V$?+t(p{=SJp#_z$d;-OF-V7We3E zDoiXHLP&vBz_KTj%U+(<0zOmO%e3%)X?3?i2JMXiCM=v2=_Sg>~>x^+a$WlC-!c{ z-r~c|`i2q>e7_e`nxfHaLL%|JIp=zG1k^5m%a3Z2rD1Z~WE|%tc%25}SVt|KdsGio zf7bCso@X(=Dcf;yk0O05RbYyjMkA@$p@q9G$q5;wj6>7WJERZ~-5rAa%MY_aF-~Nc z_K|&zZV@uA)=XT@jy>EW!ds^=vYwSYXw5TslGYqg2}d;9n1A>9Ap4b=Ki&`)UULKe zesh{AW{z7`=Fzne$FaYMfD5c-%S-Y=LA?W3$QD?yEKS7D+WqXTr!fDMTMKU+T}fk1 zFs61ZdvH7k5vgti-9cQ??7rkOod+Vy z|0iU52Os3h+s2~_W{Ik$rGy@T44jEIWHv*Uak`LAzxZ8+%#ArxcIQFd>Hr&Pxr5CZ z_(GUhT}7^6tYD$oZ#K9}no^9;v-zzB_(#eTvfN^+V2TDc-!;Q2u6xT$XIf(6rf4`P zb&_>gW-;F}4dA?29rkQ?hrbEC*~0Cbgh$8VPQE6+eZkm5A%8c%P4F5;XNny5%!TXOZ{gg0Rg!yF z0NVR}!9m)BrrZ?v_!B>t4tc7IJxfe5N>VtdzDKnEFc%fKnd4LA3|93;5}sNm)B47* zFy_%>_-s6cI@LF`f1%1a_1IoAm@}5X<(gp3u5FMsGmKTEKC9l^1(`AtWY%8JcWgb% zZylS*_Ui59qCOUKs>ds#WWyMo7d`;@z4jpUrLo+jv7PL}IZaq};Sn=D9*_UdR@dYrAo?^meG4J zEAnr?%cj-Uagq6}Xw(TcoEOqu>a;|LGVKj8%bvG>>7Ky`8yE7Shg=A84tOaJ#ub)F zxn%?6d8f+hAGoJ_J))bGmg*i%GsZ!qVggw(;eg z{1>xG3OVA*vRkK6+g)u6dvF-rGu*I8M-vTS8*&|=UU8n;JE`Z#FYcoFF{m#81upT6 zXsWysyb3iG?$DgztW5z9xYo!v`gX(hL}zTT9?LpTCJJ?FJq=K^p>f=38qxihJH11m+uDH3RKsnLqiFvHo~tqmA)k*MIJ?XW`2I-+)Ldfl zMeRlSQ2K^z7Pv2ZgMF#4X*<9DMFEbmX=cq9zgc+fVB9reDJFP5WAY!esmxOXpXmKz z726iWx&29^`V}5no+Cn4hP1zT2fZAx$4}K1982wLcvCwHz9c2ExO-vHlpn<;dV^VN zK@OYnrknCv1E9GJ3>=@?~_RP=A|hNKupggZdr z_-_zVKa^Def@rH%G@^>HDN9@jU0jmpL554EXsS$#FFTsH|cB zOSS0r!UX(NE`h)NFEQ6gX$TTUe5GC{-JLrQC(QT)bNV*Xt}zNIduK1Olmr~5&hsk< zl<_vVO<37UX|VXRpJv4kpdO1m>}jkQ=|3`J_Kh3q{>k+$Yg~iyH_Q?I#AWPJaWFQu z=TOz(<+y5fS6OT9F_4DCr45Y&gLIlIg?NUAEc;*ng1PDc<1Z}#FF#<#fA|4jt|2S` zfBu5^fB6dq2Zf1d%z|Ojx*&E=5kyX7A=c>`H|OMFI5w&g)|n|m>-i#1GFj-9N|fAZ7^)tUc~YPEz24o+Q82%N^JU8buj7Q499|# zK>Kw#Y~NA94G^0OuY(4{>w|g_;~4~&72CKw<8rv4m)+pg;%yLEqz609Ea3a1$NXp& zJE3dP#nruZg2Go4V6fVQdvV7J4FA688m3JFy^*%?WXTu)fvOER-{2n?;u99)`@j4J zi~sT${QkRs{{LCORjwhc|Bt^QwJT)J|64&RF|o@NlIYj4G_1)uNx#njV5!fQnQCsX zz=YX}H5(FfzIv%h;oujRpLPLWSex*J?u@|g$}QJarf0D)ujZoOu)XxBQeX^frZClz zlkl)}9sLlvt9K72W1J>~4?{dI47Sgmf1krneuGT*BKaV=f^#L<%FIS7oca% zM%txqfv4NoVD{*IYQORWawMF&W#12AX69xl2r)XV)O zTX7DLzGwogEHQez`aQI*JVG0+pA#qWXceMLG2de!{QY9j6%XG^c8BL+%CR^^E*D*f z23(&z~eRM3+^S09f<*Tf|JRD6v$=n1guDyl|YC-^4BwqU@rZ2FcJ zPwV;z;lG*bY>3J=+_i5QKCrHZ`HNGiU%Q#!?5$_%WsE_RB`Rw@M3;biY`l4#I&Pk& z$1i1R`>~q>D{e04x(70i-gn!$~$x3Ceuic-^^*Sg8A(Z+krs=c)%_ zUg3DUJ+y(f*r?#;nq9Pd;BidM4IHNMLV7$fKoqJpFLX!q%}#!aSZArQQg@uXCL7wv!BOv2=j# z_l57+DM_RKGto!$K3jU?1q{!~;D^pY#>$Rve>;Qc@_)l=^D51ZB z8GehHLB(QmM9w?0a%>p+n4F1`g>|Q=W$6Bf_x0 zE|s?jZ~T{E$hQ`x@*nsAg7{10QLCh*Oc~a&qQG=4Nz!KWc4M*p_&K_fBkY$X_fc_P z1KvKfnEMW;bUQL`l-B@9Qc5v=xpHkE460TiMLw0hqTy zh20#z8RFWtaQ*rSvi_SzZ>5J}?{^6-IIxHlb zln$Lgfb+)ffWpfI$Z57U&YpP-lkyYk?{G787Th&fx}h}ShbgIz_ruwP+xhhu_OVCX z#LJIv{KyREW?*Ks244N)iu*f*uwjc1UX<5B)o2^imd&CW?dRFkdO13zqKc`d)l4)0 zGAWxq2gy%Y;9cNV3M=>vab`O?i_7}l(db0lHGDXv4t@r~iC;Ojbw%*jbu?5rr{L`K z580*9^YOIqIVuzK-cG$SwARTJau+WkS>e4fR6PeF@DJQPmWcC4TQW!g2SBZMSm61T zf0F?8%VIswDi}i}FQih=PF=j(8jY5Yhf)27J2&ZrB+U?N@bu6M`ZfC>o7(+Dbi>ph zf-;-I(b$8+UB405Z-AYbTu{0~@H`k5<1e*LS|NTGL#JhvXYNueTzr5sg7GmR#3s0o;velBj+=%XV2Sx*Tr;aoVKu;wYT+iKdW%u#mO?TYoM7ul@%9 zw|)fur+x&xhOGNf{gB!j5+YO&d-c|dH$|R3Cft3s9^Nk}goB6E`Qu9*%0kwMg@pcZ zQJVPQq7?Su4-Nm%hi-5U+4z4&X~3?Ki2s)L#P!OypRSIz^)gqnec(61_No3j+xP#J zZBtfB+b%gb##Xyh+IEfmcN;!^if!*#Wt+H7H8#<0A5psXRr#T=6r0C;9+zK=^(t5V zrEOa@aH8$H?b5cF;)dHElAmoeV(k^SI%kmWSEcxJ@z!1CMlQuRPcpPCp025}`PkP` zUX;*e^Xj>EMWWOJo5Y&WHn-nfR#erU69OLcw(YZwY%gld+BS|HPHV!&Z5JmGxBdL) zlZ{k_fvwBUD>mEC-m^KGzsaVwYM`yoTMJv)gOavKZx5@;ml|n1WBL-CY0ATGi+_mQ z%G_wMsrV{x`#?0ncIc;Co4!UP+g$I@046egk%fME>WYNW6b7X&Vw1781QOJT%-SM0~M~K(-2A7q&WZ^=9AoAp<=_ sB*r?5Xz9Xyd4>6gNNx*}3OD)x00Wh#x&QzG diff --git a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500.tune_metadata b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/checkpoint-500.tune_metadata deleted file mode 100644 index 1b1a3cedee628b3a79ddc3581036927903274e2a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 214 zcmXwzJrBV^7{`x#7i?yusVREJBDN-y?zdf@y62Kh&|dEdgT(5aSbY~C#RqVt*5Cj2 z=3O-zvmPM?&p8%qj|a<@&=odvY#xDJ1VE{rrQt-%B}ytILB%wkg%E|h%G%DREEZr@ zcFrOZQYvXZUca7sJS2qmor1YCnZSRhk=$0lYX(jz hI2bO}*6QY+b)C6@3qN6DzOyY>cO>u^KGp4`{{b=NKEnV2 diff --git a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv deleted file mode 100644 index 4592a9c8..00000000 --- a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/progress.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3c8e4335a85e9b0dab4e306c3cc0f7abfd151f09c2a5229e97a88d2c1036d4b9 -size 1787766 diff --git a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json b/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json deleted file mode 100644 index f37fb6d8..00000000 --- a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/result.json +++ /dev/null @@ -1,500 +0,0 @@ -{"episode_reward_max": 25.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.65625, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 14.0}, "policy_reward_mean": {"ppo": 4.328125}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.65625, "shaped_reward_min": 0, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.46875, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.34375, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.78125, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.75, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.84375, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 5.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.28125, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.28125, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 1.03125, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 0.84375, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.96875, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.5625, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.15625, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.125, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.125, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.03125, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.9375, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.4375, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.28125, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 2, "soup_delivery_agent_0_mean": 0.125, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.0625, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.3125, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.125, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 1.03125, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 0.84375, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.03125, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 0.84375, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [3.0, 6.0, 8.0, 6.0, 3.0, 9.0, 9.0, 9.0, 17.0, 25.0, 8.0, 6.0, 6.0, 3.0, 3.0, 17.0, 14.0, 17.0, 6.0, 3.0, 14.0, 11.0, 9.0, 3.0, 8.0, 11.0, 11.0, 14.0, 0.0, 3.0, 9.0, 6.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 0.0, 6.0, 0.0, 8.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 6.0, 3.0, 0.0, 9.0, 14.0, 3.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 14.0, 3.0, 14.0, 0.0, 11.0, 6.0, 6.0, 0.0, 3.0, 0.0, 9.0, 5.0, 3.0, 8.0, 3.0, 6.0, 3.0, 0.0, 3.0, 5.0, 3.0, 8.0, 8.0, 3.0, 11.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45519290086990427, "mean_processing_ms": 0.16432746798999048, "mean_inference_ms": 0.7360864547945244}, "off_policy_estimator": {}, "info": {"num_steps_trained": 24000, "num_steps_sampled": 12800, "sample_time_ms": 17464.662, "load_time_ms": 141.686, "grad_time_ms": 6292.799, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.20000000298023224, "cur_lr": 0.0010000000474974513, "total_loss": -0.3615521490573883, "policy_loss": -0.003929345868527889, "vf_loss": 0.7481115460395813, "vf_explained_var": 0.002059757709503174, "kl": 0.00042938394472002983, "entropy": 1.7889174222946167, "entropy_coeff": 0.20000000298023224, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 12800, "episodes_total": 32, "training_iteration": 1, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-13-21", "timestamp": 1660241601, "time_this_iter_s": 23.966287851333618, "time_total_s": 23.966287851333618, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 23.966287851333618, "timesteps_since_restore": 12800, "iterations_since_restore": 1, "perf": {"cpu_util_percent": 43.42857142857144, "ram_util_percent": 57.03714285714286}} -{"episode_reward_max": 25.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.140625, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 14.0}, "policy_reward_mean": {"ppo": 4.5703125}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.140625, "shaped_reward_min": 0, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.734375, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.328125, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.65625, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.46875, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 4.109375, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 4.90625, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.65625, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.515625, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.03125, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 0.90625, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.90625, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.4375, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.15625, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.171875, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.4375, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.03125, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.953125, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.859375, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.53125, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.375, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.171875, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.046875, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.34375, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.21875, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.03125, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 0.90625, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.03125, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 0.90625, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0, 3.0, 6.0, 8.0, 6.0, 3.0, 9.0, 9.0, 9.0, 17.0, 25.0, 8.0, 6.0, 6.0, 3.0, 3.0, 17.0, 14.0, 17.0, 6.0, 3.0, 14.0, 11.0, 9.0, 3.0, 8.0, 11.0, 11.0, 14.0, 0.0, 3.0, 9.0, 6.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 3.0, 0.0, 0.0, 6.0, 0.0, 8.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 6.0, 3.0, 0.0, 9.0, 14.0, 3.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 14.0, 3.0, 14.0, 0.0, 11.0, 6.0, 6.0, 0.0, 3.0, 0.0, 9.0, 5.0, 3.0, 8.0, 3.0, 6.0, 3.0, 0.0, 3.0, 5.0, 3.0, 8.0, 8.0, 3.0, 11.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45293878883424954, "mean_processing_ms": 0.1625872490925028, "mean_inference_ms": 0.7353549498376587}, "off_policy_estimator": {}, "info": {"num_steps_trained": 48000, "num_steps_sampled": 25600, "sample_time_ms": 17329.738, "load_time_ms": 90.796, "grad_time_ms": 6126.382, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.10000000149011612, "cur_lr": 0.0010000000474974513, "total_loss": -0.35112297534942627, "policy_loss": -0.008805765770375729, "vf_loss": 0.7840461730957031, "vf_explained_var": -0.002521991729736328, "kl": 0.00048407851136289537, "entropy": 1.7883315086364746, "entropy_coeff": 0.19148799777030945, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 25600, "episodes_total": 64, "training_iteration": 2, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-13-45", "timestamp": 1660241625, "time_this_iter_s": 23.218619108200073, "time_total_s": 47.18490695953369, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 47.18490695953369, "timesteps_since_restore": 25600, "iterations_since_restore": 2, "perf": {"cpu_util_percent": 37.300000000000004, "ram_util_percent": 57.44117647058823}} -{"episode_reward_max": 25.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.052083333333334, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 16.0}, "policy_reward_mean": {"ppo": 4.526041666666667}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.052083333333334, "shaped_reward_min": 0, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.791666666666667, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.104166666666667, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.8854166666666665, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.260416666666667, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 4.208333333333333, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.697916666666667, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.5208333333333333, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.5104166666666667, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.0416666666666667, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 0.875, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.8229166666666665, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.46875, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.17708333333333334, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.3333333333333335, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.0416666666666665, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.9895833333333334, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.8125, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.4479166666666667, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.3854166666666667, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.14583333333333334, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.07291666666666667, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.2708333333333333, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.20833333333333334, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.0416666666666667, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 0.875, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.0416666666666667, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 0.875, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0, 3.0, 6.0, 8.0, 6.0, 3.0, 9.0, 9.0, 9.0, 17.0, 25.0, 8.0, 6.0, 6.0, 3.0, 3.0, 17.0, 14.0, 17.0, 6.0, 3.0, 14.0, 11.0, 9.0, 3.0, 8.0, 11.0, 11.0, 14.0, 0.0, 3.0, 9.0, 6.0, 0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 3.0, 0.0, 0.0, 6.0, 0.0, 8.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 6.0, 3.0, 0.0, 9.0, 14.0, 3.0, 14.0, 11.0, 3.0, 5.0, 3.0, 3.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 14.0, 3.0, 14.0, 0.0, 11.0, 6.0, 6.0, 0.0, 3.0, 0.0, 9.0, 5.0, 3.0, 8.0, 3.0, 6.0, 3.0, 0.0, 3.0, 5.0, 3.0, 8.0, 8.0, 3.0, 11.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45189034776232173, "mean_processing_ms": 0.1615299805648739, "mean_inference_ms": 0.7382304408765018}, "off_policy_estimator": {}, "info": {"num_steps_trained": 72000, "num_steps_sampled": 38400, "sample_time_ms": 17410.737, "load_time_ms": 73.556, "grad_time_ms": 6485.631, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.05000000074505806, "cur_lr": 0.0010000000474974513, "total_loss": -0.3324081599712372, "policy_loss": -0.005397057626396418, "vf_loss": 0.7086341977119446, "vf_explained_var": -0.000792384147644043, "kl": 0.0004734609683509916, "entropy": 1.7876968383789062, "entropy_coeff": 0.18297599256038666, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 38400, "episodes_total": 96, "training_iteration": 3, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-10", "timestamp": 1660241650, "time_this_iter_s": 24.84310221672058, "time_total_s": 72.02800917625427, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 72.02800917625427, "timesteps_since_restore": 38400, "iterations_since_restore": 3, "perf": {"cpu_util_percent": 38.59428571428571, "ram_util_percent": 57.505714285714284}} -{"episode_reward_max": 24.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 18.0}, "policy_reward_mean": {"ppo": 4.775}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.55, "shaped_reward_min": 0, "shaped_reward_max": 24, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.79, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.74, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.83, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.88, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 4.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.58, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.09, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.85, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.64, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.36, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.09, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.85, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.09, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.85, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0, 0.0, 3.0, 9.0, 6.0, 0.0, 6.0, 11.0, 20.0, 3.0, 6.0, 14.0, 6.0, 9.0, 3.0, 3.0, 6.0, 9.0, 12.0, 12.0, 19.0, 8.0, 12.0, 8.0, 14.0, 3.0, 3.0, 17.0, 3.0, 20.0, 11.0, 3.0, 14.0, 22.0, 3.0, 14.0, 14.0, 9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 0.0, 0.0, 0.0, 3.0, 3.0, 6.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 8.0, 6.0, 14.0, 0.0, 3.0, 0.0, 6.0, 11.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 3.0, 0.0, 9.0, 12.0, 0.0, 3.0, 9.0, 14.0, 5.0, 8.0, 0.0, 6.0, 6.0, 3.0, 5.0, 11.0, 3.0, 0.0, 3.0, 3.0, 0.0, 14.0, 3.0, 3.0, 0.0, 14.0, 6.0, 6.0, 5.0, 0.0, 3.0, 0.0, 14.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45063714229107177, "mean_processing_ms": 0.15983031380236057, "mean_inference_ms": 0.739669952549497}, "off_policy_estimator": {}, "info": {"num_steps_trained": 96000, "num_steps_sampled": 51200, "sample_time_ms": 17369.146, "load_time_ms": 64.562, "grad_time_ms": 6646.22, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.02500000037252903, "cur_lr": 0.0010000000474974513, "total_loss": -0.3157036006450653, "policy_loss": -0.004088650923222303, "vf_loss": 0.8062646985054016, "vf_explained_var": 0.0032039880752563477, "kl": 0.0005627681966871023, "entropy": 1.7866708040237427, "entropy_coeff": 0.17446400225162506, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 51200, "episodes_total": 128, "training_iteration": 4, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-35", "timestamp": 1660241675, "time_this_iter_s": 24.43727397918701, "time_total_s": 96.46528315544128, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 96.46528315544128, "timesteps_since_restore": 51200, "iterations_since_restore": 4, "perf": {"cpu_util_percent": 39.84571428571428, "ram_util_percent": 57.64285714285714}} -{"episode_reward_max": 54.0, "episode_reward_min": 3.0, "episode_reward_mean": 10.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 28.0}, "policy_reward_mean": {"ppo": 5.42}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 10.44, "shaped_reward_min": 3, "shaped_reward_max": 25, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.52, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.69, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.68, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.83, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.94, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.7, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.33, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.92, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.63, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 0.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.16, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.22, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.94, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.94, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0, 22.0, 3.0, 14.0, 14.0, 9.0, 17.0, 9.0, 8.0, 19.0, 3.0, 9.0, 14.0, 11.0, 22.0, 6.0, 3.0, 3.0, 6.0, 9.0, 3.0, 11.0, 11.0, 3.0, 3.0, 11.0, 9.0, 9.0, 3.0, 11.0, 6.0, 11.0, 6.0, 16.0, 8.0, 6.0, 9.0, 8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 13.0, 9.0, 0.0, 3.0, 11.0, 3.0, 11.0, 3.0, 6.0, 3.0, 3.0, 14.0, 3.0, 6.0, 3.0, 5.0, 3.0, 16.0, 3.0, 0.0, 6.0, 3.0, 5.0, 9.0, 3.0, 8.0, 14.0, 8.0, 0.0, 6.0, 3.0, 0.0, 0.0, 3.0, 0.0, 6.0, 9.0, 0.0, 0.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 0.0, 0.0, 3.0, 8.0, 3.0, 9.0, 0.0, 9.0, 0.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 11.0, 0.0, 3.0, 3.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 0.45066530826567375, "mean_processing_ms": 0.15893004682590756, "mean_inference_ms": 0.7420671329840245}, "off_policy_estimator": {}, "info": {"num_steps_trained": 120000, "num_steps_sampled": 64000, "sample_time_ms": 17388.622, "load_time_ms": 59.359, "grad_time_ms": 6772.673, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.012500000186264515, "cur_lr": 0.0010000000474974513, "total_loss": -0.30251750349998474, "policy_loss": -0.006208862643688917, "vf_loss": 1.4635206460952759, "vf_explained_var": 0.0046030678786337376, "kl": 0.0005594257963821292, "entropy": 1.7864326238632202, "entropy_coeff": 0.16595199704170227, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 64000, "episodes_total": 160, "training_iteration": 5, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-14-59", "timestamp": 1660241699, "time_this_iter_s": 24.809880018234253, "time_total_s": 121.27516317367554, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 121.27516317367554, "timesteps_since_restore": 64000, "iterations_since_restore": 5, "perf": {"cpu_util_percent": 39.71142857142857, "ram_util_percent": 57.60285714285714}} -{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 12.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 6.195}, "custom_metrics": {"sparse_reward_mean": 0.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 11.59, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.22, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.75, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.39, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.9, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 4.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.99, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.99, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.99, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0, 16.0, 8.0, 6.0, 9.0, 8.0, 6.0, 8.0, 8.0, 6.0, 3.0, 24.0, 6.0, 6.0, 6.0, 19.0, 11.0, 14.0, 8.0, 9.0, 3.0, 8.0, 14.0, 14.0, 9.0, 17.0, 3.0, 19.0, 19.0, 17.0, 12.0, 6.0, 14.0, 11.0, 14.0, 14.0, 9.0, 16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 8.0, 8.0, 5.0, 3.0, 6.0, 0.0, 3.0, 6.0, 5.0, 3.0, 3.0, 3.0, 3.0, 5.0, 8.0, 0.0, 0.0, 6.0, 0.0, 3.0, 6.0, 18.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 11.0, 8.0, 3.0, 8.0, 6.0, 8.0, 8.0, 0.0, 6.0, 3.0, 0.0, 3.0, 0.0, 8.0, 14.0, 0.0, 9.0, 5.0, 9.0, 0.0, 6.0, 11.0, 3.0, 0.0, 11.0, 8.0, 6.0, 13.0, 12.0, 5.0, 6.0, 6.0, 6.0, 0.0, 3.0, 11.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0]}, "sampler_perf": {"mean_env_wait_ms": 0.4527646689746759, "mean_processing_ms": 0.15894443082147025, "mean_inference_ms": 0.7459920431247151}, "off_policy_estimator": {}, "info": {"num_steps_trained": 144000, "num_steps_sampled": 76800, "sample_time_ms": 17631.781, "load_time_ms": 55.878, "grad_time_ms": 7055.375, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0062500000931322575, "cur_lr": 0.0010000000474974513, "total_loss": -0.285332590341568, "policy_loss": -0.004330330062657595, "vf_loss": 1.753544807434082, "vf_explained_var": 0.007292529102414846, "kl": 0.0005500561674125493, "entropy": 1.7859567403793335, "entropy_coeff": 0.15744000673294067, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 76800, "episodes_total": 192, "training_iteration": 6, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-15-27", "timestamp": 1660241727, "time_this_iter_s": 27.381940841674805, "time_total_s": 148.65710401535034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 148.65710401535034, "timesteps_since_restore": 76800, "iterations_since_restore": 6, "perf": {"cpu_util_percent": 46.235897435897435, "ram_util_percent": 57.91025641025641}} -{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 6.98}, "custom_metrics": {"sparse_reward_mean": 0.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.36, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.26, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.86, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.13, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.81, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 4.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.74, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.13, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.16, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.81, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.17, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.22, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.85, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.23, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.31, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.13, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.16, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.13, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.16, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0, 11.0, 14.0, 14.0, 9.0, 16.0, 9.0, 8.0, 14.0, 9.0, 9.0, 6.0, 11.0, 19.0, 9.0, 12.0, 8.0, 17.0, 3.0, 11.0, 16.0, 11.0, 6.0, 22.0, 6.0, 3.0, 14.0, 11.0, 20.0, 25.0, 54.0, 14.0, 16.0, 3.0, 14.0, 3.0, 3.0, 14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 8.0, 5.0, 9.0, 6.0, 3.0, 11.0, 5.0, 6.0, 3.0, 0.0, 8.0, 8.0, 6.0, 6.0, 3.0, 0.0, 9.0, 0.0, 6.0, 5.0, 6.0, 11.0, 8.0, 0.0, 9.0, 12.0, 0.0, 8.0, 0.0, 5.0, 12.0, 0.0, 3.0, 6.0, 5.0, 5.0, 11.0, 5.0, 6.0, 3.0, 3.0, 0.0, 22.0, 3.0, 3.0, 3.0, 0.0, 11.0, 3.0, 5.0, 6.0, 14.0, 6.0, 16.0, 9.0, 26.0, 28.0, 11.0, 3.0, 8.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 7.041783650517479, "mean_processing_ms": 0.16106120541031635, "mean_inference_ms": 2.816008339885107}, "off_policy_estimator": {}, "info": {"num_steps_trained": 168000, "num_steps_sampled": 89600, "sample_time_ms": 363821.155, "load_time_ms": 54.883, "grad_time_ms": 7100.098, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0031250000465661287, "cur_lr": 0.0010000000474974513, "total_loss": -0.26905307173728943, "policy_loss": -0.0034452469553798437, "vf_loss": 2.160554885864258, "vf_explained_var": 0.012243330478668213, "kl": 0.0006163662183098495, "entropy": 1.784928321838379, "entropy_coeff": 0.14892800152301788, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 89600, "episodes_total": 224, "training_iteration": 7, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-56-15", "timestamp": 1660244175, "time_this_iter_s": 2448.401287794113, "time_total_s": 2597.0583918094635, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2597.0583918094635, "timesteps_since_restore": 89600, "iterations_since_restore": 7, "perf": {"cpu_util_percent": 53.55, "ram_util_percent": 58.647826086956535}} -{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 14.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 7.205}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.41, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.32, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.44, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.61, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.2, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.11, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.53, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 3, "soup_pickup_agent_1_mean": 0.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.41, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.2, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.11, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.2, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.11, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0, 3.0, 14.0, 3.0, 3.0, 14.0, 6.0, 20.0, 17.0, 14.0, 11.0, 6.0, 8.0, 6.0, 0.0, 11.0, 3.0, 22.0, 11.0, 19.0, 9.0, 20.0, 23.0, 6.0, 11.0, 11.0, 22.0, 19.0, 11.0, 11.0, 17.0, 14.0, 6.0, 68.0, 20.0, 9.0, 8.0, 6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 3.0, 0.0, 0.0, 14.0, 0.0, 3.0, 3.0, 0.0, 8.0, 6.0, 6.0, 0.0, 14.0, 6.0, 14.0, 3.0, 14.0, 0.0, 8.0, 3.0, 0.0, 6.0, 8.0, 0.0, 0.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 0.0, 8.0, 14.0, 3.0, 8.0, 6.0, 13.0, 9.0, 0.0, 17.0, 3.0, 12.0, 11.0, 0.0, 6.0, 11.0, 0.0, 8.0, 3.0, 6.0, 16.0, 9.0, 10.0, 5.0, 6.0, 5.0, 6.0, 9.0, 8.0, 3.0, 11.0, 6.0, 0.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0]}, "sampler_perf": {"mean_env_wait_ms": 12.808520770186506, "mean_processing_ms": 0.16344551542853641, "mean_inference_ms": 4.631216998135988}, "off_policy_estimator": {}, "info": {"num_steps_trained": 192000, "num_steps_sampled": 102400, "sample_time_ms": 320761.187, "load_time_ms": 52.674, "grad_time_ms": 7271.518, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0015625000232830644, "cur_lr": 0.0010000000474974513, "total_loss": -0.25552046298980713, "policy_loss": -0.005265455227345228, "vf_loss": 1.9171754121780396, "vf_explained_var": 0.015465259552001953, "kl": 0.0006017824052833021, "entropy": 1.7836121320724487, "entropy_coeff": 0.1404159963130951, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 102400, "episodes_total": 256, "training_iteration": 8, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-56-43", "timestamp": 1660244203, "time_this_iter_s": 27.877708196640015, "time_total_s": 2624.9361000061035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2624.9361000061035, "timesteps_since_restore": 102400, "iterations_since_restore": 8, "perf": {"cpu_util_percent": 42.6075, "ram_util_percent": 58.39000000000001}} -{"episode_reward_max": 68.0, "episode_reward_min": 0.0, "episode_reward_mean": 16.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 8.09}, "custom_metrics": {"sparse_reward_mean": 1.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.78, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.25, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.27, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.46, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.21, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 1.33, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 3.05, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.19, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.22, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.54, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.49, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.21, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 1.33, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.21, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 1.33, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0, 68.0, 20.0, 9.0, 8.0, 6.0, 14.0, 19.0, 6.0, 6.0, 17.0, 14.0, 3.0, 25.0, 11.0, 14.0, 9.0, 17.0, 14.0, 17.0, 14.0, 25.0, 17.0, 6.0, 19.0, 14.0, 3.0, 6.0, 3.0, 6.0, 63.0, 9.0, 28.0, 14.0, 8.0, 57.0, 9.0, 11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 31.0, 37.0, 11.0, 9.0, 0.0, 9.0, 0.0, 8.0, 3.0, 3.0, 6.0, 8.0, 10.0, 9.0, 0.0, 6.0, 3.0, 3.0, 9.0, 8.0, 6.0, 8.0, 0.0, 3.0, 17.0, 8.0, 8.0, 3.0, 5.0, 9.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 14.0, 3.0, 6.0, 8.0, 9.0, 16.0, 6.0, 11.0, 0.0, 6.0, 14.0, 5.0, 6.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 3.0, 3.0, 29.0, 34.0, 6.0, 3.0, 6.0, 22.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 17.933374555696833, "mean_processing_ms": 0.16517403131021888, "mean_inference_ms": 6.240884020190002}, "off_policy_estimator": {}, "info": {"num_steps_trained": 216000, "num_steps_sampled": 115200, "sample_time_ms": 287052.402, "load_time_ms": 50.896, "grad_time_ms": 7408.088, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0007812500116415322, "cur_lr": 0.0010000000474974513, "total_loss": -0.24049125611782074, "policy_loss": -0.005544388201087713, "vf_loss": 1.8025983572006226, "vf_explained_var": 0.016161540523171425, "kl": 0.0006836934480816126, "entropy": 1.7825666666030884, "entropy_coeff": 0.1319040060043335, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 115200, "episodes_total": 288, "training_iteration": 9, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-57-09", "timestamp": 1660244229, "time_this_iter_s": 25.946558237075806, "time_total_s": 2650.8826582431793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2650.8826582431793, "timesteps_since_restore": 115200, "iterations_since_restore": 9, "perf": {"cpu_util_percent": 38.36216216216216, "ram_util_percent": 57.93513513513512}} -{"episode_reward_max": 57.0, "episode_reward_min": 0.0, "episode_reward_mean": 15.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 34.0}, "policy_reward_mean": {"ppo": 7.83}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.66, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.85, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.23, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.74, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 3, "potting_onion_agent_1_mean": 1.43, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.81, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.93, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 0.89, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.25, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.33, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 3, "optimal_onion_potting_agent_1_mean": 1.43, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 3, "viable_onion_potting_agent_1_mean": 1.43, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0, 14.0, 8.0, 57.0, 9.0, 11.0, 9.0, 17.0, 8.0, 14.0, 19.0, 6.0, 33.0, 11.0, 6.0, 11.0, 14.0, 16.0, 3.0, 17.0, 6.0, 16.0, 11.0, 3.0, 57.0, 16.0, 8.0, 17.0, 9.0, 57.0, 11.0, 0.0, 11.0, 11.0, 11.0, 22.0, 11.0, 6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 8.0, 6.0, 5.0, 3.0, 31.0, 26.0, 3.0, 6.0, 3.0, 8.0, 6.0, 3.0, 11.0, 6.0, 0.0, 8.0, 0.0, 14.0, 3.0, 16.0, 3.0, 3.0, 19.0, 14.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 11.0, 3.0, 3.0, 13.0, 0.0, 3.0, 8.0, 9.0, 3.0, 3.0, 6.0, 10.0, 3.0, 8.0, 0.0, 3.0, 23.0, 34.0, 8.0, 8.0, 8.0, 0.0, 3.0, 14.0, 0.0, 9.0, 28.0, 29.0, 6.0, 5.0, 0.0, 0.0, 6.0, 5.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 16.781937376069852, "mean_processing_ms": 0.16502285382446033, "mean_inference_ms": 5.880940450206554}, "off_policy_estimator": {}, "info": {"num_steps_trained": 240000, "num_steps_sampled": 128000, "sample_time_ms": 260081.675, "load_time_ms": 49.456, "grad_time_ms": 7564.799, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0003906250058207661, "cur_lr": 0.0010000000474974513, "total_loss": -0.22780847549438477, "policy_loss": -0.00820181891322136, "vf_loss": 1.5030304193496704, "vf_explained_var": 0.01960124634206295, "kl": 0.0007011755951680243, "entropy": 1.7809678316116333, "entropy_coeff": 0.1233920007944107, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 128000, "episodes_total": 320, "training_iteration": 10, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-57-36", "timestamp": 1660244256, "time_this_iter_s": 26.38225793838501, "time_total_s": 2677.2649161815643, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2677.2649161815643, "timesteps_since_restore": 128000, "iterations_since_restore": 10, "perf": {"cpu_util_percent": 35.91621621621621, "ram_util_percent": 58.01891891891893}} -{"episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 16.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 37.0}, "policy_reward_mean": {"ppo": 8.47}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 14.94, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.74, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.22, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.89, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.18, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.59, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.46, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.85, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.65, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.18, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.59, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.18, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.59, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0, 11.0, 11.0, 22.0, 11.0, 6.0, 28.0, 3.0, 14.0, 19.0, 57.0, 9.0, 11.0, 9.0, 17.0, 14.0, 14.0, 23.0, 25.0, 11.0, 28.0, 14.0, 14.0, 17.0, 12.0, 3.0, 16.0, 14.0, 22.0, 36.0, 17.0, 12.0, 22.0, 14.0, 16.0, 14.0, 17.0, 14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 6.0, 5.0, 3.0, 8.0, 14.0, 8.0, 3.0, 8.0, 0.0, 6.0, 22.0, 6.0, 3.0, 0.0, 8.0, 6.0, 10.0, 9.0, 23.0, 34.0, 3.0, 6.0, 11.0, 0.0, 6.0, 3.0, 11.0, 6.0, 11.0, 3.0, 5.0, 9.0, 17.0, 6.0, 9.0, 16.0, 8.0, 3.0, 6.0, 22.0, 3.0, 11.0, 14.0, 0.0, 6.0, 11.0, 6.0, 6.0, 0.0, 3.0, 5.0, 11.0, 8.0, 6.0, 6.0, 16.0, 11.0, 25.0, 8.0, 9.0, 3.0, 9.0, 16.0, 6.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 15.10732110386456, "mean_processing_ms": 0.16432950718550896, "mean_inference_ms": 5.354497783846054}, "off_policy_estimator": {}, "info": {"num_steps_trained": 264000, "num_steps_sampled": 140800, "sample_time_ms": 260129.157, "load_time_ms": 39.286, "grad_time_ms": 7759.155, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.00019531250291038305, "cur_lr": 0.0010000000474974513, "total_loss": -0.21097473800182343, "policy_loss": -0.006903436034917831, "vf_loss": 2.839796781539917, "vf_explained_var": 0.029899099841713905, "kl": 0.0006908049690537155, "entropy": 1.7788597345352173, "entropy_coeff": 0.11488000303506851, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 140800, "episodes_total": 352, "training_iteration": 11, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-58-02", "timestamp": 1660244282, "time_this_iter_s": 26.244181156158447, "time_total_s": 2703.509097337723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2703.509097337723, "timesteps_since_restore": 140800, "iterations_since_restore": 11, "perf": {"cpu_util_percent": 36.42631578947368, "ram_util_percent": 57.83157894736843}} -{"episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 18.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 40.0}, "policy_reward_mean": {"ppo": 9.17}, "custom_metrics": {"sparse_reward_mean": 1.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 15.54, "shaped_reward_min": 3, "shaped_reward_max": 31, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.67, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.21, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.04, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 1.18, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.74, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.55, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.98, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.61, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.42, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.18, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.74, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.18, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.74, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0, 14.0, 16.0, 14.0, 17.0, 14.0, 19.0, 9.0, 6.0, 9.0, 17.0, 6.0, 6.0, 20.0, 20.0, 17.0, 14.0, 11.0, 9.0, 57.0, 16.0, 17.0, 14.0, 11.0, 8.0, 11.0, 16.0, 14.0, 25.0, 22.0, 3.0, 12.0, 9.0, 9.0, 17.0, 17.0, 3.0, 17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 8.0, 6.0, 16.0, 0.0, 11.0, 3.0, 6.0, 11.0, 8.0, 6.0, 10.0, 9.0, 3.0, 6.0, 0.0, 6.0, 3.0, 6.0, 8.0, 9.0, 6.0, 0.0, 3.0, 3.0, 14.0, 6.0, 9.0, 11.0, 14.0, 3.0, 3.0, 11.0, 11.0, 0.0, 3.0, 6.0, 23.0, 34.0, 11.0, 5.0, 14.0, 3.0, 8.0, 6.0, 8.0, 3.0, 0.0, 8.0, 8.0, 3.0, 10.0, 6.0, 6.0, 8.0, 13.0, 12.0, 8.0, 14.0, 0.0, 3.0, 6.0, 6.0, 0.0, 9.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0]}, "sampler_perf": {"mean_env_wait_ms": 13.746840147909538, "mean_processing_ms": 0.16393515170904638, "mean_inference_ms": 4.926997257535761}, "off_policy_estimator": {}, "info": {"num_steps_trained": 288000, "num_steps_sampled": 153600, "sample_time_ms": 260207.009, "load_time_ms": 38.842, "grad_time_ms": 8068.402, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.765625145519152e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.19599168002605438, "policy_loss": -0.007250078488141298, "vf_loss": 2.85541033744812, "vf_explained_var": 0.045025069266557693, "kl": 0.0006896388367749751, "entropy": 1.7771064043045044, "entropy_coeff": 0.10636799782514572, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 153600, "episodes_total": 384, "training_iteration": 12, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-58-29", "timestamp": 1660244309, "time_this_iter_s": 27.08998394012451, "time_total_s": 2730.5990812778473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2730.5990812778473, "timesteps_since_restore": 153600, "iterations_since_restore": 12, "perf": {"cpu_util_percent": 37.42368421052632, "ram_util_percent": 58.20789473684212}} -{"episode_reward_max": 76.0, "episode_reward_min": 3.0, "episode_reward_mean": 21.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 45.0}, "policy_reward_mean": {"ppo": 10.6}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 17.6, "shaped_reward_min": 3, "shaped_reward_max": 37, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.63, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.81, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.24, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.08, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.98, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.92, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 0.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.74, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.54, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.98, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.98, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0, 9.0, 17.0, 17.0, 3.0, 17.0, 14.0, 20.0, 25.0, 20.0, 14.0, 11.0, 9.0, 3.0, 17.0, 65.0, 11.0, 9.0, 23.0, 65.0, 11.0, 8.0, 25.0, 11.0, 28.0, 19.0, 20.0, 17.0, 9.0, 11.0, 60.0, 17.0, 3.0, 27.0, 25.0, 11.0, 8.0, 20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 3.0, 6.0, 9.0, 8.0, 8.0, 9.0, 0.0, 3.0, 9.0, 8.0, 8.0, 6.0, 14.0, 6.0, 6.0, 19.0, 6.0, 14.0, 0.0, 14.0, 3.0, 8.0, 3.0, 6.0, 0.0, 3.0, 9.0, 8.0, 28.0, 37.0, 5.0, 6.0, 3.0, 6.0, 14.0, 9.0, 31.0, 34.0, 5.0, 6.0, 0.0, 8.0, 11.0, 14.0, 5.0, 6.0, 9.0, 19.0, 8.0, 11.0, 12.0, 8.0, 3.0, 14.0, 0.0, 9.0, 0.0, 11.0, 26.0, 34.0, 8.0, 9.0, 0.0, 3.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 12.621089528193751, "mean_processing_ms": 0.1639912702671728, "mean_inference_ms": 4.5967771099672925}, "off_policy_estimator": {}, "info": {"num_steps_trained": 312000, "num_steps_sampled": 166400, "sample_time_ms": 261554.79, "load_time_ms": 38.918, "grad_time_ms": 8201.071, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.882812572759576e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.18183590471744537, "policy_loss": -0.00839205738157034, "vf_loss": 3.3925907611846924, "vf_explained_var": 0.04012133553624153, "kl": 0.0007842599879950285, "entropy": 1.775907039642334, "entropy_coeff": 0.09785600006580353, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 166400, "episodes_total": 416, "training_iteration": 13, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-59-09", "timestamp": 1660244349, "time_this_iter_s": 39.64977407455444, "time_total_s": 2770.2488553524017, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2770.2488553524017, "timesteps_since_restore": 166400, "iterations_since_restore": 13, "perf": {"cpu_util_percent": 43.457142857142856, "ram_util_percent": 59.38095238095238}} -{"episode_reward_max": 76.0, "episode_reward_min": 3.0, "episode_reward_mean": 22.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 45.0}, "policy_reward_mean": {"ppo": 11.325}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 19.05, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.74, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 10, "onion_pickup_agent_1_mean": 5.54, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.91, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.51, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.98, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.23, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.27, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.88, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 1.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.6, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.51, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.98, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.51, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.98, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0, 27.0, 25.0, 11.0, 8.0, 20.0, 11.0, 60.0, 25.0, 22.0, 65.0, 12.0, 11.0, 23.0, 3.0, 30.0, 17.0, 19.0, 31.0, 28.0, 19.0, 14.0, 25.0, 25.0, 22.0, 20.0, 11.0, 11.0, 57.0, 17.0, 6.0, 17.0, 11.0, 11.0, 11.0, 14.0, 14.0, 76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 13.0, 14.0, 14.0, 11.0, 11.0, 0.0, 8.0, 0.0, 11.0, 9.0, 0.0, 11.0, 20.0, 40.0, 12.0, 13.0, 11.0, 11.0, 34.0, 31.0, 3.0, 9.0, 5.0, 6.0, 0.0, 23.0, 3.0, 0.0, 16.0, 14.0, 6.0, 11.0, 13.0, 6.0, 17.0, 14.0, 14.0, 14.0, 6.0, 13.0, 5.0, 9.0, 16.0, 9.0, 16.0, 9.0, 5.0, 17.0, 8.0, 12.0, 3.0, 8.0, 0.0, 11.0, 26.0, 31.0, 0.0, 17.0, 6.0, 0.0, 3.0, 14.0, 8.0, 3.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0]}, "sampler_perf": {"mean_env_wait_ms": 11.674808393625103, "mean_processing_ms": 0.16471740447247565, "mean_inference_ms": 4.325696825802514}, "off_policy_estimator": {}, "info": {"num_steps_trained": 336000, "num_steps_sampled": 179200, "sample_time_ms": 262067.662, "load_time_ms": 39.631, "grad_time_ms": 8508.383, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.441406286379788e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.169602632522583, "policy_loss": -0.011421400122344494, "vf_loss": 3.2296648025512695, "vf_explained_var": 0.07911600917577744, "kl": 0.0008258241578005254, "entropy": 1.7740892171859741, "entropy_coeff": 0.08934400230646133, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 179200, "episodes_total": 448, "training_iteration": 14, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_19-59-41", "timestamp": 1660244381, "time_this_iter_s": 32.64548587799072, "time_total_s": 2802.8943412303925, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2802.8943412303925, "timesteps_since_restore": 179200, "iterations_since_restore": 14, "perf": {"cpu_util_percent": 44.92765957446808, "ram_util_percent": 58.32340425531916}} -{"episode_reward_max": 84.0, "episode_reward_min": 3.0, "episode_reward_mean": 23.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 11.545}, "custom_metrics": {"sparse_reward_mean": 1.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 19.49, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.62, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.9, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 2.94, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.0, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.61, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.53, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.07, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.42, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.91, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.13, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 1.03, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 0.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.69, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 11, "soup_drop_agent_1_mean": 0.59, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 1.53, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.07, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.53, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.07, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0, 11.0, 11.0, 14.0, 14.0, 76.0, 30.0, 25.0, 9.0, 28.0, 17.0, 68.0, 6.0, 3.0, 22.0, 17.0, 37.0, 14.0, 34.0, 12.0, 17.0, 27.0, 11.0, 25.0, 11.0, 17.0, 17.0, 25.0, 11.0, 12.0, 3.0, 19.0, 22.0, 60.0, 25.0, 25.0, 34.0, 19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 6.0, 5.0, 8.0, 3.0, 3.0, 11.0, 11.0, 3.0, 31.0, 45.0, 19.0, 11.0, 5.0, 20.0, 6.0, 3.0, 6.0, 22.0, 6.0, 11.0, 28.0, 40.0, 6.0, 0.0, 3.0, 0.0, 11.0, 11.0, 8.0, 9.0, 20.0, 17.0, 5.0, 9.0, 20.0, 14.0, 9.0, 3.0, 5.0, 12.0, 14.0, 13.0, 3.0, 8.0, 13.0, 12.0, 11.0, 0.0, 5.0, 12.0, 9.0, 8.0, 17.0, 8.0, 0.0, 11.0, 9.0, 3.0, 3.0, 0.0, 6.0, 13.0, 8.0, 14.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 10.867209759257396, "mean_processing_ms": 0.16544513042105083, "mean_inference_ms": 4.098041869605518}, "off_policy_estimator": {}, "info": {"num_steps_trained": 360000, "num_steps_sampled": 192000, "sample_time_ms": 262263.341, "load_time_ms": 39.79, "grad_time_ms": 8700.33, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.220703143189894e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.14661313593387604, "policy_loss": -0.0037220455706119537, "vf_loss": 3.146031618118286, "vf_explained_var": 0.09564539045095444, "kl": 0.0008609917131252587, "entropy": 1.7716461420059204, "entropy_coeff": 0.08083199709653854, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 192000, "episodes_total": 480, "training_iteration": 15, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-00-10", "timestamp": 1660244410, "time_this_iter_s": 28.69369125366211, "time_total_s": 2831.5880324840546, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2831.5880324840546, "timesteps_since_restore": 192000, "iterations_since_restore": 15, "perf": {"cpu_util_percent": 32.489999999999995, "ram_util_percent": 57.802499999999995}} -{"episode_reward_max": 84.0, "episode_reward_min": 3.0, "episode_reward_mean": 25.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 12.94}, "custom_metrics": {"sparse_reward_mean": 2.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 20.68, "shaped_reward_min": 3, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.11, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.01, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.28, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.06, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.49, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.61, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.25, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.28, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.34, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 1.13, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.58, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.62, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.61, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.25, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.61, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.25, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0, 60.0, 25.0, 25.0, 34.0, 19.0, 28.0, 14.0, 17.0, 17.0, 19.0, 17.0, 22.0, 63.0, 66.0, 22.0, 14.0, 9.0, 22.0, 14.0, 22.0, 20.0, 71.0, 25.0, 23.0, 3.0, 22.0, 22.0, 16.0, 20.0, 9.0, 24.0, 44.0, 12.0, 17.0, 20.0, 20.0, 17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 28.0, 32.0, 19.0, 6.0, 8.0, 17.0, 19.0, 15.0, 8.0, 11.0, 17.0, 11.0, 3.0, 11.0, 5.0, 12.0, 3.0, 14.0, 3.0, 16.0, 11.0, 6.0, 14.0, 8.0, 31.0, 32.0, 29.0, 37.0, 11.0, 11.0, 5.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 6.0, 8.0, 14.0, 11.0, 9.0, 41.0, 30.0, 11.0, 14.0, 9.0, 14.0, 0.0, 3.0, 14.0, 8.0, 14.0, 8.0, 8.0, 8.0, 14.0, 6.0, 3.0, 6.0, 3.0, 21.0, 14.0, 30.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0]}, "sampler_perf": {"mean_env_wait_ms": 10.168319641902713, "mean_processing_ms": 0.16598079643560795, "mean_inference_ms": 3.884933201233045}, "off_policy_estimator": {}, "info": {"num_steps_trained": 384000, "num_steps_sampled": 204800, "sample_time_ms": 262331.542, "load_time_ms": 40.118, "grad_time_ms": 8762.055, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.10351571594947e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.13645179569721222, "policy_loss": -0.00917948316782713, "vf_loss": 5.045528888702393, "vf_explained_var": 0.08776132017374039, "kl": 0.0009270868613384664, "entropy": 1.7668260335922241, "entropy_coeff": 0.07231999933719635, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 204800, "episodes_total": 512, "training_iteration": 16, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-00-39", "timestamp": 1660244439, "time_this_iter_s": 28.684066772460938, "time_total_s": 2860.2720992565155, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2860.2720992565155, "timesteps_since_restore": 204800, "iterations_since_restore": 16, "perf": {"cpu_util_percent": 36.62439024390244, "ram_util_percent": 57.81951219512195}} -{"episode_reward_max": 84.0, "episode_reward_min": 6.0, "episode_reward_mean": 27.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 51.0}, "policy_reward_mean": {"ppo": 13.93}, "custom_metrics": {"sparse_reward_mean": 3.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.06, "shaped_reward_min": 6, "shaped_reward_max": 44, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.37, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.13, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.43, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.09, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.71, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.35, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.08, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.84, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.38, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 1.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.61, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.71, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.35, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.71, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.35, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0, 12.0, 17.0, 20.0, 20.0, 17.0, 84.0, 14.0, 9.0, 9.0, 34.0, 22.0, 9.0, 23.0, 20.0, 9.0, 36.0, 14.0, 22.0, 11.0, 20.0, 25.0, 17.0, 6.0, 9.0, 30.0, 60.0, 9.0, 25.0, 22.0, 14.0, 23.0, 28.0, 19.0, 6.0, 76.0, 25.0, 17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 3.0, 9.0, 11.0, 6.0, 12.0, 8.0, 9.0, 11.0, 14.0, 3.0, 33.0, 51.0, 3.0, 11.0, 3.0, 6.0, 0.0, 9.0, 14.0, 20.0, 9.0, 13.0, 0.0, 9.0, 17.0, 6.0, 14.0, 6.0, 3.0, 6.0, 27.0, 9.0, 8.0, 6.0, 3.0, 19.0, 3.0, 8.0, 8.0, 12.0, 11.0, 14.0, 8.0, 9.0, 3.0, 3.0, 6.0, 3.0, 13.0, 17.0, 26.0, 34.0, 6.0, 3.0, 14.0, 11.0, 11.0, 11.0, 3.0, 11.0, 9.0, 14.0, 13.0, 15.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0]}, "sampler_perf": {"mean_env_wait_ms": 9.556971747293405, "mean_processing_ms": 0.16611483871912305, "mean_inference_ms": 3.69458132267773}, "off_policy_estimator": {}, "info": {"num_steps_trained": 408000, "num_steps_sampled": 217600, "sample_time_ms": 20254.523, "load_time_ms": 38.861, "grad_time_ms": 8931.015, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.051757857974735e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.12081533670425415, "policy_loss": -0.008616355247795582, "vf_loss": 5.614309310913086, "vf_explained_var": 0.13559557497501373, "kl": 0.0008749772678129375, "entropy": 1.76718270778656, "entropy_coeff": 0.06380800157785416, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 217600, "episodes_total": 544, "training_iteration": 17, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-01-08", "timestamp": 1660244468, "time_this_iter_s": 29.310136079788208, "time_total_s": 2889.5822353363037, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2889.5822353363037, "timesteps_since_restore": 217600, "iterations_since_restore": 17, "perf": {"cpu_util_percent": 39.28333333333334, "ram_util_percent": 57.69761904761903}} -{"episode_reward_max": 82.0, "episode_reward_min": 6.0, "episode_reward_mean": 31.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 50.0}, "policy_reward_mean": {"ppo": 15.58}, "custom_metrics": {"sparse_reward_mean": 4.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 23.16, "shaped_reward_min": 6, "shaped_reward_max": 47, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.73, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.83, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.76, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.82, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 3.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.25, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.94, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.25, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.25, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0, 19.0, 6.0, 76.0, 25.0, 17.0, 26.0, 42.0, 36.0, 11.0, 17.0, 9.0, 12.0, 20.0, 30.0, 22.0, 60.0, 23.0, 66.0, 36.0, 28.0, 79.0, 22.0, 31.0, 68.0, 28.0, 20.0, 54.0, 9.0, 14.0, 17.0, 66.0, 20.0, 14.0, 11.0, 17.0, 19.0, 17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 8.0, 11.0, 3.0, 3.0, 38.0, 38.0, 17.0, 8.0, 12.0, 5.0, 20.0, 6.0, 14.0, 28.0, 19.0, 17.0, 3.0, 8.0, 3.0, 14.0, 3.0, 6.0, 6.0, 6.0, 3.0, 17.0, 17.0, 13.0, 9.0, 13.0, 26.0, 34.0, 14.0, 9.0, 37.0, 29.0, 8.0, 28.0, 12.0, 16.0, 29.0, 50.0, 3.0, 19.0, 8.0, 23.0, 33.0, 35.0, 5.0, 23.0, 14.0, 6.0, 31.0, 23.0, 9.0, 0.0, 3.0, 11.0, 3.0, 14.0, 26.0, 40.0, 3.0, 17.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 9.01879810548929, "mean_processing_ms": 0.16630796767792247, "mean_inference_ms": 3.5295458802458652}, "off_policy_estimator": {}, "info": {"num_steps_trained": 432000, "num_steps_sampled": 230400, "sample_time_ms": 20513.179, "load_time_ms": 38.872, "grad_time_ms": 8952.524, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.5258789289873675e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.10658890753984451, "policy_loss": -0.009598230011761189, "vf_loss": 4.846475601196289, "vf_explained_var": 0.11691506952047348, "kl": 0.0009377954411320388, "entropy": 1.762791633605957, "entropy_coeff": 0.055296000093221664, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 230400, "episodes_total": 576, "training_iteration": 18, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-01-39", "timestamp": 1660244499, "time_this_iter_s": 30.67889380455017, "time_total_s": 2920.261129140854, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2920.261129140854, "timesteps_since_restore": 230400, "iterations_since_restore": 18, "perf": {"cpu_util_percent": 40.46744186046512, "ram_util_percent": 57.767441860465105}} -{"episode_reward_max": 88.0, "episode_reward_min": 8.0, "episode_reward_mean": 32.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 53.0}, "policy_reward_mean": {"ppo": 16.475}, "custom_metrics": {"sparse_reward_mean": 3.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 25.35, "shaped_reward_min": 8, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.56, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.65, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.81, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.27, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.23, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.91, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.94, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.27, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.23, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.27, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.23, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0, 14.0, 11.0, 17.0, 19.0, 17.0, 28.0, 34.0, 20.0, 57.0, 14.0, 71.0, 12.0, 12.0, 66.0, 28.0, 63.0, 17.0, 28.0, 23.0, 9.0, 20.0, 22.0, 71.0, 26.0, 27.0, 20.0, 20.0, 9.0, 19.0, 20.0, 76.0, 19.0, 68.0, 68.0, 17.0, 25.0, 76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 6.0, 8.0, 5.0, 6.0, 9.0, 8.0, 8.0, 11.0, 6.0, 11.0, 19.0, 9.0, 17.0, 17.0, 3.0, 17.0, 26.0, 31.0, 8.0, 6.0, 38.0, 33.0, 6.0, 6.0, 3.0, 9.0, 34.0, 32.0, 17.0, 11.0, 31.0, 32.0, 6.0, 11.0, 16.0, 12.0, 12.0, 11.0, 3.0, 6.0, 3.0, 17.0, 10.0, 12.0, 36.0, 35.0, 11.0, 15.0, 13.0, 14.0, 3.0, 17.0, 14.0, 6.0, 6.0, 3.0, 6.0, 13.0, 12.0, 8.0, 40.0, 36.0, 8.0, 11.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0]}, "sampler_perf": {"mean_env_wait_ms": 8.541158675961446, "mean_processing_ms": 0.1664622179417048, "mean_inference_ms": 3.3849019348875076}, "off_policy_estimator": {}, "info": {"num_steps_trained": 456000, "num_steps_sampled": 243200, "sample_time_ms": 20842.585, "load_time_ms": 38.976, "grad_time_ms": 8953.309, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 7.629394644936838e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.09107109159231186, "policy_loss": -0.009291496127843857, "vf_loss": 5.607062816619873, "vf_explained_var": 0.08896120637655258, "kl": 0.0008400729275308549, "entropy": 1.7600102424621582, "entropy_coeff": 0.04678399860858917, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 243200, "episodes_total": 608, "training_iteration": 19, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-02-08", "timestamp": 1660244528, "time_this_iter_s": 29.248838186264038, "time_total_s": 2949.509967327118, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2949.509967327118, "timesteps_since_restore": 243200, "iterations_since_restore": 19, "perf": {"cpu_util_percent": 33.43571428571428, "ram_util_percent": 57.790476190476205}} -{"episode_reward_max": 88.0, "episode_reward_min": 8.0, "episode_reward_mean": 37.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 18.55}, "custom_metrics": {"sparse_reward_mean": 4.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 27.5, "shaped_reward_min": 8, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.78, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.75, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.94, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.45, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.42, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.13, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.31, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.65, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.62, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.45, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.42, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.45, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.42, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0, 68.0, 68.0, 17.0, 25.0, 76.0, 44.0, 28.0, 12.0, 36.0, 33.0, 9.0, 24.0, 28.0, 25.0, 31.0, 68.0, 17.0, 47.0, 23.0, 12.0, 17.0, 22.0, 28.0, 23.0, 20.0, 37.0, 14.0, 8.0, 28.0, 22.0, 39.0, 82.0, 71.0, 57.0, 30.0, 9.0, 34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 40.0, 28.0, 34.0, 34.0, 3.0, 14.0, 9.0, 16.0, 34.0, 42.0, 18.0, 26.0, 11.0, 17.0, 9.0, 3.0, 17.0, 19.0, 19.0, 14.0, 3.0, 6.0, 16.0, 8.0, 16.0, 12.0, 12.0, 13.0, 20.0, 11.0, 37.0, 31.0, 9.0, 8.0, 28.0, 19.0, 9.0, 14.0, 3.0, 9.0, 11.0, 6.0, 8.0, 14.0, 11.0, 17.0, 9.0, 14.0, 9.0, 11.0, 20.0, 17.0, 9.0, 5.0, 5.0, 3.0, 16.0, 12.0, 12.0, 10.0, 12.0, 27.0, 38.0, 44.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0]}, "sampler_perf": {"mean_env_wait_ms": 8.114151695990262, "mean_processing_ms": 0.1664756703889973, "mean_inference_ms": 3.2561915126083236}, "off_policy_estimator": {}, "info": {"num_steps_trained": 480000, "num_steps_sampled": 256000, "sample_time_ms": 21118.245, "load_time_ms": 39.16, "grad_time_ms": 8931.236, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.814697322468419e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.07289835065603256, "policy_loss": -0.006338973995298147, "vf_loss": 7.939427852630615, "vf_explained_var": 0.1275780349969864, "kl": 0.000996587099507451, "entropy": 1.7598587274551392, "entropy_coeff": 0.03827200084924698, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 256000, "episodes_total": 640, "training_iteration": 20, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-02-37", "timestamp": 1660244557, "time_this_iter_s": 28.921189069747925, "time_total_s": 2978.431156396866, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 2978.431156396866, "timesteps_since_restore": 256000, "iterations_since_restore": 20, "perf": {"cpu_util_percent": 30.78048780487805, "ram_util_percent": 57.77073170731706}} -{"episode_reward_max": 116.0, "episode_reward_min": 9.0, "episode_reward_mean": 39.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 19.52}, "custom_metrics": {"sparse_reward_mean": 5.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 27.44, "shaped_reward_min": 9, "shaped_reward_max": 48, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.61, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.52, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.9, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.3, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.63, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.32, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.49, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.57, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.69, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.3, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.63, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.3, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.63, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0, 71.0, 57.0, 30.0, 9.0, 34.0, 42.0, 88.0, 33.0, 17.0, 30.0, 76.0, 85.0, 33.0, 31.0, 68.0, 12.0, 16.0, 79.0, 68.0, 19.0, 33.0, 19.0, 25.0, 31.0, 36.0, 44.0, 39.0, 11.0, 22.0, 42.0, 31.0, 25.0, 20.0, 31.0, 31.0, 17.0, 23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 35.0, 36.0, 29.0, 28.0, 11.0, 19.0, 0.0, 9.0, 11.0, 23.0, 25.0, 17.0, 45.0, 43.0, 22.0, 11.0, 5.0, 12.0, 17.0, 13.0, 48.0, 28.0, 32.0, 53.0, 22.0, 11.0, 18.0, 13.0, 29.0, 39.0, 6.0, 6.0, 3.0, 13.0, 37.0, 42.0, 32.0, 36.0, 11.0, 8.0, 13.0, 20.0, 11.0, 8.0, 8.0, 17.0, 14.0, 17.0, 19.0, 17.0, 14.0, 30.0, 14.0, 25.0, 5.0, 6.0, 6.0, 16.0, 9.0, 33.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 7.729743796447544, "mean_processing_ms": 0.1663097758329898, "mean_inference_ms": 3.1401875416297957}, "off_policy_estimator": {}, "info": {"num_steps_trained": 504000, "num_steps_sampled": 268800, "sample_time_ms": 21421.608, "load_time_ms": 38.85, "grad_time_ms": 8956.305, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.9073486612342094e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.05712709203362465, "policy_loss": -0.005733281373977661, "vf_loss": 7.644298553466797, "vf_explained_var": 0.10351377725601196, "kl": 0.0011409734142944217, "entropy": 1.7526286840438843, "entropy_coeff": 0.029759999364614487, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 268800, "episodes_total": 672, "training_iteration": 21, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-03-07", "timestamp": 1660244587, "time_this_iter_s": 29.522944927215576, "time_total_s": 3007.9541013240814, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3007.9541013240814, "timesteps_since_restore": 268800, "iterations_since_restore": 21, "perf": {"cpu_util_percent": 34.18809523809524, "ram_util_percent": 57.730952380952374}} -{"episode_reward_max": 116.0, "episode_reward_min": 9.0, "episode_reward_mean": 37.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 18.735}, "custom_metrics": {"sparse_reward_mean": 5.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 26.67, "shaped_reward_min": 9, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.51, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.51, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.59, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.49, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.55, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.18, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.81, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.35, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.73, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.55, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.55, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0, 20.0, 31.0, 31.0, 17.0, 23.0, 74.0, 17.0, 14.0, 9.0, 76.0, 63.0, 88.0, 66.0, 37.0, 37.0, 37.0, 68.0, 77.0, 28.0, 28.0, 68.0, 25.0, 60.0, 20.0, 22.0, 36.0, 23.0, 82.0, 12.0, 9.0, 23.0, 19.0, 42.0, 79.0, 42.0, 20.0, 25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 6.0, 14.0, 12.0, 19.0, 11.0, 20.0, 14.0, 3.0, 11.0, 12.0, 43.0, 31.0, 11.0, 6.0, 14.0, 0.0, 9.0, 0.0, 45.0, 31.0, 28.0, 35.0, 32.0, 56.0, 38.0, 28.0, 14.0, 23.0, 8.0, 29.0, 14.0, 23.0, 34.0, 34.0, 34.0, 43.0, 14.0, 14.0, 14.0, 14.0, 30.0, 38.0, 14.0, 11.0, 26.0, 34.0, 6.0, 14.0, 8.0, 14.0, 17.0, 19.0, 17.0, 6.0, 39.0, 43.0, 6.0, 6.0, 3.0, 6.0, 11.0, 12.0, 13.0, 6.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0]}, "sampler_perf": {"mean_env_wait_ms": 7.382168276478265, "mean_processing_ms": 0.16610498275202595, "mean_inference_ms": 3.034581035402087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 528000, "num_steps_sampled": 281600, "sample_time_ms": 21629.558, "load_time_ms": 39.186, "grad_time_ms": 8958.867, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.536743306171047e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.045350998640060425, "policy_loss": -0.008628163486719131, "vf_loss": 5.3433098793029785, "vf_explained_var": 0.18131445348262787, "kl": 0.0009087324724532664, "entropy": 1.7534428834915161, "entropy_coeff": 0.021247999742627144, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 281600, "episodes_total": 704, "training_iteration": 22, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-03-36", "timestamp": 1660244616, "time_this_iter_s": 29.197812795639038, "time_total_s": 3037.1519141197205, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3037.1519141197205, "timesteps_since_restore": 281600, "iterations_since_restore": 22, "perf": {"cpu_util_percent": 36.32142857142857, "ram_util_percent": 57.66904761904762}} -{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 40.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 20.145}, "custom_metrics": {"sparse_reward_mean": 6.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 28.29, "shaped_reward_min": 9, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.54, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.44, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.7, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.43, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.7, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.2, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.37, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.49, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.55, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.82, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.67, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.43, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.7, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.43, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.7, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0, 42.0, 79.0, 42.0, 20.0, 25.0, 9.0, 17.0, 27.0, 14.0, 19.0, 60.0, 36.0, 25.0, 17.0, 16.0, 28.0, 34.0, 45.0, 23.0, 36.0, 22.0, 22.0, 63.0, 12.0, 68.0, 25.0, 63.0, 25.0, 11.0, 57.0, 76.0, 30.0, 116.0, 45.0, 79.0, 80.0, 73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 17.0, 25.0, 37.0, 42.0, 22.0, 20.0, 6.0, 14.0, 12.0, 13.0, 3.0, 6.0, 6.0, 11.0, 10.0, 17.0, 11.0, 3.0, 6.0, 13.0, 26.0, 34.0, 17.0, 19.0, 3.0, 22.0, 8.0, 9.0, 8.0, 8.0, 19.0, 9.0, 28.0, 6.0, 25.0, 20.0, 14.0, 9.0, 9.0, 27.0, 16.0, 6.0, 11.0, 11.0, 31.0, 32.0, 6.0, 6.0, 33.0, 35.0, 13.0, 12.0, 29.0, 34.0, 6.0, 19.0, 3.0, 8.0, 26.0, 31.0, 34.0, 42.0, 10.0, 20.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0]}, "sampler_perf": {"mean_env_wait_ms": 7.066502405705404, "mean_processing_ms": 0.16594670413037083, "mean_inference_ms": 2.938769748646424}, "off_policy_estimator": {}, "info": {"num_steps_trained": 552000, "num_steps_sampled": 294400, "sample_time_ms": 20554.7, "load_time_ms": 39.212, "grad_time_ms": 9005.588, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.7683716530855236e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.025955183431506157, "policy_loss": -0.0047044227831065655, "vf_loss": 9.28939437866211, "vf_explained_var": 0.20618398487567902, "kl": 0.0012655678438022733, "entropy": 1.741496205329895, "entropy_coeff": 0.012736000120639801, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 294400, "episodes_total": 736, "training_iteration": 23, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-04-05", "timestamp": 1660244645, "time_this_iter_s": 29.364897966384888, "time_total_s": 3066.5168120861053, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3066.5168120861053, "timesteps_since_restore": 294400, "iterations_since_restore": 23, "perf": {"cpu_util_percent": 34.0, "ram_util_percent": 57.70487804878048}} -{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 43.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 21.51}, "custom_metrics": {"sparse_reward_mean": 6.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 29.42, "shaped_reward_min": 9, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.56, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.35, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.71, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.74, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.68, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.66, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.29, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.32, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.46, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.47, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.76, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.68, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.66, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.68, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.66, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0, 116.0, 45.0, 79.0, 80.0, 73.0, 25.0, 31.0, 34.0, 16.0, 9.0, 23.0, 39.0, 35.0, 28.0, 79.0, 36.0, 57.0, 25.0, 28.0, 12.0, 17.0, 25.0, 25.0, 71.0, 44.0, 62.0, 22.0, 17.0, 23.0, 25.0, 37.0, 66.0, 50.0, 23.0, 12.0, 30.0, 63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 68.0, 48.0, 22.0, 23.0, 48.0, 31.0, 45.0, 35.0, 41.0, 32.0, 17.0, 8.0, 10.0, 21.0, 21.0, 13.0, 6.0, 10.0, 6.0, 3.0, 9.0, 14.0, 25.0, 14.0, 27.0, 8.0, 5.0, 23.0, 34.0, 45.0, 25.0, 11.0, 23.0, 34.0, 14.0, 11.0, 14.0, 14.0, 6.0, 6.0, 3.0, 14.0, 14.0, 11.0, 11.0, 14.0, 34.0, 37.0, 28.0, 16.0, 26.0, 36.0, 13.0, 9.0, 6.0, 11.0, 9.0, 14.0, 20.0, 5.0, 23.0, 14.0, 32.0, 34.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0]}, "sampler_perf": {"mean_env_wait_ms": 6.778628865492887, "mean_processing_ms": 0.1658515378049688, "mean_inference_ms": 2.8523193064237637}, "off_policy_estimator": {}, "info": {"num_steps_trained": 576000, "num_steps_sampled": 307200, "sample_time_ms": 20518.226, "load_time_ms": 38.592, "grad_time_ms": 9000.858, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.3841858265427618e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.01208301167935133, "policy_loss": -0.0054773432202637196, "vf_loss": 7.661229610443115, "vf_explained_var": 0.24070757627487183, "kl": 0.001010378822684288, "entropy": 1.7452141046524048, "entropy_coeff": 0.004224000032991171, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 307200, "episodes_total": 768, "training_iteration": 24, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-04-37", "timestamp": 1660244677, "time_this_iter_s": 32.22774386405945, "time_total_s": 3098.744555950165, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3098.744555950165, "timesteps_since_restore": 307200, "iterations_since_restore": 24, "perf": {"cpu_util_percent": 33.75869565217391, "ram_util_percent": 57.7217391304348}} -{"episode_reward_max": 128.0, "episode_reward_min": 9.0, "episode_reward_mean": 45.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 67.0}, "policy_reward_mean": {"ppo": 22.71}, "custom_metrics": {"sparse_reward_mean": 7.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 30.62, "shaped_reward_min": 9, "shaped_reward_max": 56, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.66, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.55, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.23, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 2.55, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.99, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.36, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.91, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.57, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.79, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.87, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.55, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.99, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.55, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.99, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0, 50.0, 23.0, 12.0, 30.0, 63.0, 9.0, 12.0, 125.0, 82.0, 68.0, 47.0, 24.0, 28.0, 30.0, 33.0, 20.0, 47.0, 128.0, 22.0, 37.0, 43.0, 74.0, 80.0, 53.0, 62.0, 76.0, 36.0, 31.0, 39.0, 20.0, 28.0, 9.0, 34.0, 17.0, 60.0, 85.0, 9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 19.0, 31.0, 11.0, 12.0, 6.0, 6.0, 9.0, 21.0, 29.0, 34.0, 3.0, 6.0, 6.0, 6.0, 58.0, 67.0, 48.0, 34.0, 32.0, 36.0, 8.0, 39.0, 9.0, 15.0, 13.0, 15.0, 19.0, 11.0, 14.0, 19.0, 8.0, 12.0, 28.0, 19.0, 61.0, 67.0, 10.0, 12.0, 11.0, 26.0, 14.0, 29.0, 40.0, 34.0, 38.0, 42.0, 17.0, 36.0, 26.0, 36.0, 39.0, 37.0, 14.0, 22.0, 22.0, 9.0, 22.0, 17.0, 3.0, 17.0, 8.0, 20.0, 9.0, 0.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 6.515320221213355, "mean_processing_ms": 0.1659420994573044, "mean_inference_ms": 2.777987966122339}, "off_policy_estimator": {}, "info": {"num_steps_trained": 600000, "num_steps_sampled": 320000, "sample_time_ms": 21126.015, "load_time_ms": 38.663, "grad_time_ms": 9096.932, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1920929132713809e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.00892395805567503, "policy_loss": -0.008948341012001038, "vf_loss": 8.925480842590332, "vf_explained_var": 0.24435751140117645, "kl": 0.0012184166116639972, "entropy": 1.7363275289535522, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 320000, "episodes_total": 800, "training_iteration": 25, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-05-13", "timestamp": 1660244713, "time_this_iter_s": 35.73040580749512, "time_total_s": 3134.47496175766, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3134.47496175766, "timesteps_since_restore": 320000, "iterations_since_restore": 25, "perf": {"cpu_util_percent": 36.32, "ram_util_percent": 57.904}} -{"episode_reward_max": 142.0, "episode_reward_min": 9.0, "episode_reward_mean": 46.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 23.465}, "custom_metrics": {"sparse_reward_mean": 7.8, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 31.33, "shaped_reward_min": 9, "shaped_reward_max": 65, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.5, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 5.81, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.81, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.7, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.0, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.76, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.68, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.76, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.7, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.0, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.7, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.0, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0, 34.0, 17.0, 60.0, 85.0, 9.0, 42.0, 31.0, 28.0, 34.0, 41.0, 14.0, 20.0, 31.0, 41.0, 79.0, 30.0, 74.0, 28.0, 9.0, 22.0, 39.0, 65.0, 63.0, 66.0, 68.0, 22.0, 76.0, 47.0, 63.0, 79.0, 79.0, 74.0, 25.0, 20.0, 17.0, 25.0, 38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 17.0, 17.0, 11.0, 6.0, 31.0, 29.0, 48.0, 37.0, 6.0, 3.0, 22.0, 20.0, 19.0, 12.0, 14.0, 14.0, 29.0, 5.0, 25.0, 16.0, 14.0, 0.0, 9.0, 11.0, 16.0, 15.0, 14.0, 27.0, 37.0, 42.0, 15.0, 15.0, 42.0, 32.0, 16.0, 12.0, 6.0, 3.0, 19.0, 3.0, 28.0, 11.0, 28.0, 37.0, 31.0, 32.0, 35.0, 31.0, 26.0, 42.0, 5.0, 17.0, 34.0, 42.0, 31.0, 16.0, 28.0, 35.0, 35.0, 44.0, 40.0, 39.0, 38.0, 36.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0]}, "sampler_perf": {"mean_env_wait_ms": 6.273535039263677, "mean_processing_ms": 0.16612280496799353, "mean_inference_ms": 2.713084381170351}, "off_policy_estimator": {}, "info": {"num_steps_trained": 624000, "num_steps_sampled": 332800, "sample_time_ms": 21568.501, "load_time_ms": 38.757, "grad_time_ms": 9275.576, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.9604645663569045e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.011042184196412563, "policy_loss": -0.01108124852180481, "vf_loss": 9.051116943359375, "vf_explained_var": 0.3293954133987427, "kl": 0.0011855209013447165, "entropy": 1.7320860624313354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 332800, "episodes_total": 832, "training_iteration": 26, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-05-48", "timestamp": 1660244748, "time_this_iter_s": 34.898388147354126, "time_total_s": 3169.373349905014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3169.373349905014, "timesteps_since_restore": 332800, "iterations_since_restore": 26, "perf": {"cpu_util_percent": 38.21224489795919, "ram_util_percent": 57.97551020408163}} -{"episode_reward_max": 142.0, "episode_reward_min": 9.0, "episode_reward_mean": 48.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 24.285}, "custom_metrics": {"sparse_reward_mean": 8.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 32.17, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.8, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.51, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.69, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.14, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.9, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.7, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.75, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.87, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.69, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.14, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.69, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.14, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0, 25.0, 20.0, 17.0, 25.0, 38.0, 33.0, 12.0, 96.0, 77.0, 31.0, 16.0, 48.0, 28.0, 53.0, 20.0, 28.0, 20.0, 74.0, 12.0, 79.0, 17.0, 91.0, 76.0, 31.0, 9.0, 23.0, 84.0, 68.0, 125.0, 82.0, 31.0, 33.0, 17.0, 42.0, 79.0, 71.0, 9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 8.0, 17.0, 12.0, 8.0, 8.0, 9.0, 9.0, 16.0, 25.0, 13.0, 8.0, 25.0, 6.0, 6.0, 59.0, 37.0, 34.0, 43.0, 20.0, 11.0, 0.0, 16.0, 20.0, 28.0, 16.0, 12.0, 30.0, 23.0, 8.0, 12.0, 14.0, 14.0, 11.0, 9.0, 39.0, 35.0, 6.0, 6.0, 37.0, 42.0, 14.0, 3.0, 45.0, 46.0, 42.0, 34.0, 20.0, 11.0, 3.0, 6.0, 12.0, 11.0, 33.0, 51.0, 40.0, 28.0, 59.0, 66.0, 40.0, 42.0, 17.0, 14.0, 14.0, 19.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 6.050716001931983, "mean_processing_ms": 0.1663751803875143, "mean_inference_ms": 2.655688107582492}, "off_policy_estimator": {}, "info": {"num_steps_trained": 648000, "num_steps_sampled": 345600, "sample_time_ms": 22006.752, "load_time_ms": 38.851, "grad_time_ms": 9447.322, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.9802322831784522e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.010166086256504059, "policy_loss": -0.010217566043138504, "vf_loss": 9.166760444641113, "vf_explained_var": 0.3867878019809723, "kl": 0.001088446588255465, "entropy": 1.7303863763809204, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 345600, "episodes_total": 864, "training_iteration": 27, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-06-24", "timestamp": 1660244784, "time_this_iter_s": 35.4101459980011, "time_total_s": 3204.783495903015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3204.783495903015, "timesteps_since_restore": 345600, "iterations_since_restore": 27, "perf": {"cpu_util_percent": 38.552, "ram_util_percent": 58.32}} -{"episode_reward_max": 145.0, "episode_reward_min": 9.0, "episode_reward_mean": 55.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 79.0}, "policy_reward_mean": {"ppo": 27.97}, "custom_metrics": {"sparse_reward_mean": 10.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 35.94, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.64, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.83, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 5.04, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 2.73, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.69, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.71, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.87, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.97, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.73, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.69, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.73, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.69, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0, 17.0, 42.0, 79.0, 71.0, 9.0, 142.0, 120.0, 26.0, 59.0, 71.0, 122.0, 44.0, 53.0, 12.0, 45.0, 47.0, 36.0, 83.0, 36.0, 74.0, 31.0, 9.0, 31.0, 9.0, 105.0, 25.0, 79.0, 33.0, 74.0, 41.0, 63.0, 31.0, 9.0, 20.0, 22.0, 31.0, 44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 5.0, 12.0, 6.0, 36.0, 34.0, 45.0, 29.0, 42.0, 0.0, 9.0, 63.0, 79.0, 63.0, 57.0, 9.0, 17.0, 36.0, 23.0, 37.0, 34.0, 60.0, 62.0, 36.0, 8.0, 28.0, 25.0, 9.0, 3.0, 14.0, 31.0, 21.0, 26.0, 16.0, 20.0, 48.0, 35.0, 17.0, 19.0, 37.0, 37.0, 3.0, 28.0, 3.0, 6.0, 9.0, 22.0, 3.0, 6.0, 51.0, 54.0, 14.0, 11.0, 28.0, 51.0, 11.0, 22.0, 34.0, 40.0, 19.0, 22.0, 37.0, 26.0, 17.0, 14.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0]}, "sampler_perf": {"mean_env_wait_ms": 5.8446114836959895, "mean_processing_ms": 0.16658688353388335, "mean_inference_ms": 2.6012841728705705}, "off_policy_estimator": {}, "info": {"num_steps_trained": 672000, "num_steps_sampled": 358400, "sample_time_ms": 22190.055, "load_time_ms": 38.857, "grad_time_ms": 9598.179, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.4901161415892261e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.008647923357784748, "policy_loss": -0.00907482486218214, "vf_loss": 12.8626708984375, "vf_explained_var": 0.32375723123550415, "kl": 0.0009376012603752315, "entropy": 1.7187572717666626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 358400, "episodes_total": 896, "training_iteration": 28, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-06-58", "timestamp": 1660244818, "time_this_iter_s": 34.0201780796051, "time_total_s": 3238.8036739826202, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3238.8036739826202, "timesteps_since_restore": 358400, "iterations_since_restore": 28, "perf": {"cpu_util_percent": 38.32708333333333, "ram_util_percent": 57.17499999999999}} -{"episode_reward_max": 145.0, "episode_reward_min": 9.0, "episode_reward_mean": 55.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 76.0}, "policy_reward_mean": {"ppo": 27.675}, "custom_metrics": {"sparse_reward_mean": 9.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 36.15, "shaped_reward_min": 9, "shaped_reward_max": 73, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.38, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.69, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.84, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.96, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.75, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.61, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.31, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.89, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.75, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.61, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.75, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.61, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0, 9.0, 20.0, 22.0, 31.0, 44.0, 12.0, 63.0, 25.0, 17.0, 49.0, 9.0, 79.0, 119.0, 43.0, 12.0, 130.0, 79.0, 53.0, 22.0, 24.0, 36.0, 17.0, 82.0, 42.0, 65.0, 88.0, 23.0, 80.0, 66.0, 44.0, 113.0, 9.0, 66.0, 76.0, 9.0, 38.0, 93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 0.0, 9.0, 8.0, 12.0, 11.0, 11.0, 17.0, 14.0, 23.0, 21.0, 9.0, 3.0, 43.0, 20.0, 9.0, 16.0, 12.0, 5.0, 18.0, 31.0, 6.0, 3.0, 34.0, 45.0, 60.0, 59.0, 14.0, 29.0, 6.0, 6.0, 65.0, 65.0, 42.0, 37.0, 19.0, 34.0, 9.0, 13.0, 8.0, 16.0, 19.0, 17.0, 5.0, 12.0, 34.0, 48.0, 17.0, 25.0, 34.0, 31.0, 57.0, 31.0, 17.0, 6.0, 32.0, 48.0, 29.0, 37.0, 19.0, 25.0, 70.0, 43.0, 0.0, 9.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0]}, "sampler_perf": {"mean_env_wait_ms": 5.653214857264523, "mean_processing_ms": 0.16674718188605944, "mean_inference_ms": 2.549555614199102}, "off_policy_estimator": {}, "info": {"num_steps_trained": 696000, "num_steps_sampled": 371200, "sample_time_ms": 22379.228, "load_time_ms": 38.745, "grad_time_ms": 9750.752, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.450580707946131e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.007846680469810963, "policy_loss": -0.007850968278944492, "vf_loss": 8.63664722442627, "vf_explained_var": 0.4092896282672882, "kl": 0.001057352521456778, "entropy": 1.7187713384628296, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 371200, "episodes_total": 928, "training_iteration": 29, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-07-30", "timestamp": 1660244850, "time_this_iter_s": 32.66524410247803, "time_total_s": 3271.4689180850983, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3271.4689180850983, "timesteps_since_restore": 371200, "iterations_since_restore": 29, "perf": {"cpu_util_percent": 40.74130434782609, "ram_util_percent": 58.79130434782609}} -{"episode_reward_max": 179.0, "episode_reward_min": 9.0, "episode_reward_mean": 58.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 92.0}, "policy_reward_mean": {"ppo": 29.3}, "custom_metrics": {"sparse_reward_mean": 10.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 38.2, "shaped_reward_min": 9, "shaped_reward_max": 70, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.55, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 7.06, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.05, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.26, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.83, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.86, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.29, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.45, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.9, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.07, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.92, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.83, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.86, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.83, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.86, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0, 66.0, 76.0, 9.0, 38.0, 93.0, 93.0, 74.0, 133.0, 106.0, 85.0, 23.0, 47.0, 93.0, 23.0, 47.0, 74.0, 33.0, 98.0, 96.0, 17.0, 49.0, 110.0, 84.0, 145.0, 34.0, 99.0, 48.0, 55.0, 71.0, 71.0, 65.0, 46.0, 44.0, 12.0, 20.0, 71.0, 39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 34.0, 32.0, 44.0, 32.0, 3.0, 6.0, 22.0, 16.0, 47.0, 46.0, 53.0, 40.0, 37.0, 37.0, 59.0, 74.0, 55.0, 51.0, 40.0, 45.0, 3.0, 20.0, 22.0, 25.0, 53.0, 40.0, 14.0, 9.0, 24.0, 23.0, 36.0, 38.0, 21.0, 12.0, 37.0, 61.0, 44.0, 52.0, 6.0, 11.0, 29.0, 20.0, 48.0, 62.0, 38.0, 46.0, 76.0, 69.0, 6.0, 28.0, 47.0, 52.0, 17.0, 31.0, 16.0, 39.0, 40.0, 31.0, 35.0, 36.0, 32.0, 33.0, 21.0, 25.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0]}, "sampler_perf": {"mean_env_wait_ms": 5.475175554008815, "mean_processing_ms": 0.16692031317355585, "mean_inference_ms": 2.4996312531417773}, "off_policy_estimator": {}, "info": {"num_steps_trained": 720000, "num_steps_sampled": 384000, "sample_time_ms": 22626.486, "load_time_ms": 38.637, "grad_time_ms": 9834.774, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.7252903539730653e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.010750534944236279, "policy_loss": -0.01101712416857481, "vf_loss": 11.21933650970459, "vf_explained_var": 0.33813270926475525, "kl": 0.0012414826778694987, "entropy": 1.7106833457946777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 384000, "episodes_total": 960, "training_iteration": 30, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-08-02", "timestamp": 1660244882, "time_this_iter_s": 32.23107981681824, "time_total_s": 3303.6999979019165, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3303.6999979019165, "timesteps_since_restore": 384000, "iterations_since_restore": 30, "perf": {"cpu_util_percent": 45.34130434782608, "ram_util_percent": 56.88478260869565}} -{"episode_reward_max": 182.0, "episode_reward_min": 9.0, "episode_reward_mean": 57.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 96.0}, "policy_reward_mean": {"ppo": 28.805}, "custom_metrics": {"sparse_reward_mean": 10.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 36.81, "shaped_reward_min": 9, "shaped_reward_max": 65, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.93, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 6.72, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 4.83, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.99, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.62, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.62, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.3, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.64, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.68, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.73, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.82, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.99, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.62, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.99, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.62, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0, 44.0, 12.0, 20.0, 71.0, 39.0, 96.0, 81.0, 77.0, 9.0, 34.0, 47.0, 20.0, 74.0, 44.0, 120.0, 34.0, 22.0, 42.0, 76.0, 66.0, 38.0, 44.0, 9.0, 101.0, 37.0, 39.0, 34.0, 36.0, 53.0, 9.0, 66.0, 80.0, 95.0, 34.0, 50.0, 54.0, 90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 23.0, 21.0, 6.0, 6.0, 8.0, 12.0, 33.0, 38.0, 15.0, 24.0, 32.0, 64.0, 38.0, 43.0, 39.0, 38.0, 0.0, 9.0, 14.0, 20.0, 18.0, 29.0, 14.0, 6.0, 40.0, 34.0, 22.0, 22.0, 62.0, 58.0, 15.0, 19.0, 10.0, 12.0, 25.0, 17.0, 34.0, 42.0, 37.0, 29.0, 22.0, 16.0, 24.0, 20.0, 0.0, 9.0, 45.0, 56.0, 20.0, 17.0, 30.0, 9.0, 12.0, 22.0, 14.0, 22.0, 27.0, 26.0, 0.0, 9.0, 26.0, 40.0, 34.0, 46.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0]}, "sampler_perf": {"mean_env_wait_ms": 5.308917467290777, "mean_processing_ms": 0.16704433507360725, "mean_inference_ms": 2.4512479594099825}, "off_policy_estimator": {}, "info": {"num_steps_trained": 744000, "num_steps_sampled": 396800, "sample_time_ms": 22677.978, "load_time_ms": 38.542, "grad_time_ms": 9985.158, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8626451769865326e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.00831019040197134, "policy_loss": -0.008908797055482864, "vf_loss": 14.524895668029785, "vf_explained_var": 0.35295844078063965, "kl": 0.0011723049683496356, "entropy": 1.7077676057815552, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 396800, "episodes_total": 992, "training_iteration": 31, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-08-34", "timestamp": 1660244914, "time_this_iter_s": 31.540908813476562, "time_total_s": 3335.240906715393, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3335.240906715393, "timesteps_since_restore": 396800, "iterations_since_restore": 31, "perf": {"cpu_util_percent": 42.184090909090905, "ram_util_percent": 56.9090909090909}} -{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 63.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 31.655}, "custom_metrics": {"sparse_reward_mean": 12.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 39.31, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.12, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.89, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 4.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.08, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.95, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 4.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.77, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.26, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.91, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.9, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.86, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.95, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.95, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0, 95.0, 34.0, 50.0, 54.0, 90.0, 48.0, 179.0, 39.0, 60.0, 91.0, 39.0, 122.0, 20.0, 34.0, 42.0, 23.0, 50.0, 12.0, 20.0, 71.0, 66.0, 35.0, 44.0, 73.0, 64.0, 17.0, 131.0, 38.0, 41.0, 36.0, 105.0, 53.0, 39.0, 71.0, 20.0, 79.0, 34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 50.0, 45.0, 18.0, 16.0, 28.0, 22.0, 28.0, 26.0, 39.0, 51.0, 20.0, 28.0, 87.0, 92.0, 25.0, 14.0, 29.0, 31.0, 45.0, 46.0, 25.0, 14.0, 65.0, 57.0, 11.0, 9.0, 22.0, 12.0, 14.0, 28.0, 6.0, 17.0, 30.0, 20.0, 6.0, 6.0, 12.0, 8.0, 23.0, 48.0, 29.0, 37.0, 13.0, 22.0, 20.0, 24.0, 36.0, 37.0, 31.0, 33.0, 11.0, 6.0, 68.0, 63.0, 11.0, 27.0, 24.0, 17.0, 20.0, 16.0, 64.0, 41.0, 31.0, 22.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0]}, "sampler_perf": {"mean_env_wait_ms": 5.15345566138169, "mean_processing_ms": 0.16717489002220728, "mean_inference_ms": 2.4061553716842257}, "off_policy_estimator": {}, "info": {"num_steps_trained": 768000, "num_steps_sampled": 409600, "sample_time_ms": 22972.316, "load_time_ms": 38.916, "grad_time_ms": 10035.28, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.313225884932663e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007412114646285772, "policy_loss": -0.007903209887444973, "vf_loss": 13.404266357421875, "vf_explained_var": 0.34650716185569763, "kl": 0.0011789536802098155, "entropy": 1.6986547708511353, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 409600, "episodes_total": 1024, "training_iteration": 32, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-09-07", "timestamp": 1660244947, "time_this_iter_s": 32.6441330909729, "time_total_s": 3367.885039806366, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3367.885039806366, "timesteps_since_restore": 409600, "iterations_since_restore": 32, "perf": {"cpu_util_percent": 40.13191489361702, "ram_util_percent": 57.20638297872342}} -{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 66.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 33.165}, "custom_metrics": {"sparse_reward_mean": 13.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 39.13, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.01, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.99, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 4.25, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.15, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.97, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.66, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.71, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 0.96, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.87, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.91, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.97, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.97, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0, 39.0, 71.0, 20.0, 79.0, 34.0, 182.0, 133.0, 66.0, 23.0, 42.0, 53.0, 125.0, 22.0, 12.0, 87.0, 98.0, 9.0, 46.0, 30.0, 12.0, 37.0, 99.0, 63.0, 85.0, 77.0, 42.0, 133.0, 173.0, 66.0, 35.0, 39.0, 88.0, 34.0, 52.0, 82.0, 23.0, 87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 12.0, 27.0, 26.0, 45.0, 11.0, 9.0, 33.0, 46.0, 23.0, 11.0, 86.0, 96.0, 68.0, 65.0, 37.0, 29.0, 17.0, 6.0, 6.0, 36.0, 22.0, 31.0, 62.0, 63.0, 8.0, 14.0, 3.0, 9.0, 41.0, 46.0, 55.0, 43.0, 0.0, 9.0, 28.0, 18.0, 19.0, 11.0, 9.0, 3.0, 23.0, 14.0, 59.0, 40.0, 31.0, 32.0, 45.0, 40.0, 39.0, 38.0, 19.0, 23.0, 67.0, 66.0, 85.0, 88.0, 34.0, 32.0, 20.0, 15.0, 22.0, 17.0, 37.0, 51.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0]}, "sampler_perf": {"mean_env_wait_ms": 5.0079354762159145, "mean_processing_ms": 0.1673845002022688, "mean_inference_ms": 2.3652145638679087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 792000, "num_steps_sampled": 422400, "sample_time_ms": 23410.855, "load_time_ms": 38.911, "grad_time_ms": 10160.504, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.6566129424663316e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.01017048116773367, "policy_loss": -0.010584059171378613, "vf_loss": 12.619880676269531, "vf_explained_var": 0.45027461647987366, "kl": 0.001254777773283422, "entropy": 1.6968183517456055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 422400, "episodes_total": 1056, "training_iteration": 33, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-09-42", "timestamp": 1660244982, "time_this_iter_s": 35.00341510772705, "time_total_s": 3402.888454914093, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3402.888454914093, "timesteps_since_restore": 422400, "iterations_since_restore": 33, "perf": {"cpu_util_percent": 42.62857142857143, "ram_util_percent": 58.25510204081633}} -{"episode_reward_max": 187.0, "episode_reward_min": 9.0, "episode_reward_mean": 68.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 101.0}, "policy_reward_mean": {"ppo": 34.395}, "custom_metrics": {"sparse_reward_mean": 13.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 41.19, "shaped_reward_min": 9, "shaped_reward_max": 77, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.84, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.2, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.04, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.36, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 3.01, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 3.65, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.85, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.21, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.04, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.01, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.01, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0, 34.0, 52.0, 82.0, 23.0, 87.0, 34.0, 187.0, 74.0, 128.0, 47.0, 23.0, 157.0, 12.0, 85.0, 20.0, 42.0, 20.0, 92.0, 82.0, 44.0, 9.0, 87.0, 20.0, 98.0, 31.0, 42.0, 83.0, 58.0, 45.0, 96.0, 81.0, 93.0, 36.0, 69.0, 88.0, 74.0, 9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 20.0, 14.0, 27.0, 25.0, 43.0, 39.0, 11.0, 12.0, 50.0, 37.0, 17.0, 17.0, 86.0, 101.0, 37.0, 37.0, 55.0, 73.0, 24.0, 23.0, 17.0, 6.0, 79.0, 78.0, 6.0, 6.0, 37.0, 48.0, 5.0, 15.0, 13.0, 29.0, 9.0, 11.0, 45.0, 47.0, 39.0, 43.0, 13.0, 31.0, 6.0, 3.0, 42.0, 45.0, 14.0, 6.0, 58.0, 40.0, 16.0, 15.0, 11.0, 31.0, 40.0, 43.0, 14.0, 44.0, 17.0, 28.0, 45.0, 51.0, 44.0, 37.0, 49.0, 44.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0]}, "sampler_perf": {"mean_env_wait_ms": 4.8713540125875445, "mean_processing_ms": 0.16758394883061775, "mean_inference_ms": 2.326528288901312}, "off_policy_estimator": {}, "info": {"num_steps_trained": 816000, "num_steps_sampled": 435200, "sample_time_ms": 23336.79, "load_time_ms": 39.064, "grad_time_ms": 10020.897, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.3283064712331658e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007203067187219858, "policy_loss": -0.007930143736302853, "vf_loss": 15.71717357635498, "vf_explained_var": 0.34764334559440613, "kl": 0.0010395334102213383, "entropy": 1.6892824172973633, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 435200, "episodes_total": 1088, "training_iteration": 34, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-10-12", "timestamp": 1660245012, "time_this_iter_s": 30.092119216918945, "time_total_s": 3432.980574131012, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 3432.980574131012, "timesteps_since_restore": 435200, "iterations_since_restore": 34, "perf": {"cpu_util_percent": 41.03023255813954, "ram_util_percent": 57.66976744186048}} -{"episode_reward_max": 146.0, "episode_reward_min": 9.0, "episode_reward_mean": 73.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 80.0}, "policy_reward_mean": {"ppo": 36.89}, "custom_metrics": {"sparse_reward_mean": 15.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 43.78, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.77, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.47, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.16, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.66, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 3.1, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.4, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 4.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.69, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 0.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.44, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.28, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.86, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.1, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.4, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.1, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.4, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0, 36.0, 69.0, 88.0, 74.0, 9.0, 76.0, 98.0, 45.0, 41.0, 127.0, 12.0, 46.0, 70.0, 144.0, 71.0, 117.0, 111.0, 9.0, 54.0, 40.0, 79.0, 14.0, 62.0, 63.0, 106.0, 20.0, 27.0, 136.0, 90.0, 34.0, 52.0, 94.0, 117.0, 90.0, 39.0, 85.0, 90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 14.0, 22.0, 35.0, 34.0, 36.0, 52.0, 48.0, 26.0, 6.0, 3.0, 41.0, 35.0, 51.0, 47.0, 23.0, 22.0, 19.0, 22.0, 57.0, 70.0, 6.0, 6.0, 21.0, 25.0, 31.0, 39.0, 73.0, 71.0, 34.0, 37.0, 55.0, 62.0, 51.0, 60.0, 6.0, 3.0, 20.0, 34.0, 20.0, 20.0, 29.0, 50.0, 11.0, 3.0, 25.0, 37.0, 23.0, 40.0, 52.0, 54.0, 8.0, 12.0, 18.0, 9.0, 66.0, 70.0, 41.0, 49.0, 12.0, 22.0, 25.0, 27.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0]}, "sampler_perf": {"mean_env_wait_ms": 4.742888771715567, "mean_processing_ms": 0.4291925216501232, "mean_inference_ms": 2.2890734350045245}, "off_policy_estimator": {}, "info": {"num_steps_trained": 840000, "num_steps_sampled": 448000, "sample_time_ms": 59523.327, "load_time_ms": 38.502, "grad_time_ms": 106209.033, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1641532356165829e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.007091447710990906, "policy_loss": -0.007865053601562977, "vf_loss": 16.12926483154297, "vf_explained_var": 0.35502591729164124, "kl": 0.0012615231098607183, "entropy": 1.6786518096923828, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 448000, "episodes_total": 1120, "training_iteration": 35, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_20-32-51", "timestamp": 1660246371, "time_this_iter_s": 1359.4666819572449, "time_total_s": 4792.447256088257, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 4792.447256088257, "timesteps_since_restore": 448000, "iterations_since_restore": 35, "perf": {"cpu_util_percent": 73.38606557377048, "ram_util_percent": 58.19344262295081}} -{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 78.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 39.155}, "custom_metrics": {"sparse_reward_mean": 16.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 46.31, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.99, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.5, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.74, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.39, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 4.55, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.86, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 0.99, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.79, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.2, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.39, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 4.55, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.39, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 4.55, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0, 117.0, 90.0, 39.0, 85.0, 90.0, 79.0, 36.0, 58.0, 49.0, 76.0, 79.0, 98.0, 136.0, 87.0, 50.0, 145.0, 28.0, 23.0, 110.0, 19.0, 134.0, 14.0, 12.0, 58.0, 77.0, 38.0, 99.0, 80.0, 142.0, 42.0, 124.0, 74.0, 93.0, 23.0, 103.0, 90.0, 42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 66.0, 51.0, 42.0, 48.0, 19.0, 20.0, 48.0, 37.0, 42.0, 48.0, 41.0, 38.0, 12.0, 24.0, 26.0, 32.0, 24.0, 25.0, 35.0, 41.0, 36.0, 43.0, 44.0, 54.0, 74.0, 62.0, 45.0, 42.0, 28.0, 22.0, 65.0, 80.0, 14.0, 14.0, 6.0, 17.0, 40.0, 70.0, 6.0, 13.0, 70.0, 64.0, 11.0, 3.0, 3.0, 9.0, 36.0, 22.0, 39.0, 38.0, 19.0, 19.0, 54.0, 45.0, 35.0, 45.0, 63.0, 79.0, 16.0, 26.0, 63.0, 61.0, 34.0, 40.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0]}, "sampler_perf": {"mean_env_wait_ms": 4.6429756749225914, "mean_processing_ms": 0.6884190143076668, "mean_inference_ms": 3.1849506897639785}, "off_policy_estimator": {}, "info": {"num_steps_trained": 864000, "num_steps_sampled": 460800, "sample_time_ms": 197454.884, "load_time_ms": 38.154, "grad_time_ms": 142553.757, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.8207661780829145e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.009596621617674828, "policy_loss": -0.010532871820032597, "vf_loss": 17.772741317749023, "vf_explained_var": 0.41850244998931885, "kl": 0.0012102305190637708, "entropy": 1.6820656061172485, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 460800, "episodes_total": 1152, "training_iteration": 36, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-02-29", "timestamp": 1660248149, "time_this_iter_s": 1777.6666460037231, "time_total_s": 6570.11390209198, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6570.11390209198, "timesteps_since_restore": 460800, "iterations_since_restore": 36, "perf": {"cpu_util_percent": 79.74032921810701, "ram_util_percent": 58.72098765432099}} -{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 82.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 41.135}, "custom_metrics": {"sparse_reward_mean": 16.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 50.27, "shaped_reward_min": 9, "shaped_reward_max": 89, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.38, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.68, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 4.88, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.05, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 3.66, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.49, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 12, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.31, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.77, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.66, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.66, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0, 93.0, 23.0, 103.0, 90.0, 42.0, 125.0, 20.0, 87.0, 46.0, 53.0, 93.0, 74.0, 121.0, 84.0, 139.0, 75.0, 57.0, 48.0, 146.0, 44.0, 61.0, 90.0, 20.0, 95.0, 31.0, 125.0, 145.0, 98.0, 129.0, 68.0, 66.0, 48.0, 105.0, 87.0, 47.0, 98.0, 31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 48.0, 45.0, 12.0, 11.0, 49.0, 54.0, 41.0, 49.0, 19.0, 23.0, 67.0, 58.0, 11.0, 9.0, 45.0, 42.0, 16.0, 30.0, 13.0, 40.0, 35.0, 58.0, 37.0, 37.0, 62.0, 59.0, 33.0, 51.0, 72.0, 67.0, 25.0, 50.0, 28.0, 29.0, 27.0, 21.0, 73.0, 73.0, 14.0, 30.0, 32.0, 29.0, 48.0, 42.0, 14.0, 6.0, 50.0, 45.0, 11.0, 20.0, 71.0, 54.0, 73.0, 72.0, 56.0, 42.0, 64.0, 65.0, 31.0, 37.0, 26.0, 40.0, 20.0, 28.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 4.550001067823368, "mean_processing_ms": 0.9407599736993785, "mean_inference_ms": 4.060064536997679}, "off_policy_estimator": {}, "info": {"num_steps_trained": 888000, "num_steps_sampled": 473600, "sample_time_ms": 197652.347, "load_time_ms": 38.247, "grad_time_ms": 142451.276, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.9103830890414573e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.00908196996897459, "policy_loss": -0.009920346550643444, "vf_loss": 16.691673278808594, "vf_explained_var": 0.3790724277496338, "kl": 0.0013888808898627758, "entropy": 1.661569595336914, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 473600, "episodes_total": 1184, "training_iteration": 37, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-03-05", "timestamp": 1660248185, "time_this_iter_s": 36.35908007621765, "time_total_s": 6606.472982168198, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6606.472982168198, "timesteps_since_restore": 473600, "iterations_since_restore": 37, "perf": {"cpu_util_percent": 52.89999999999999, "ram_util_percent": 59.76923076923076}} -{"episode_reward_max": 195.0, "episode_reward_min": 9.0, "episode_reward_mean": 82.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 41.22}, "custom_metrics": {"sparse_reward_mean": 15.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 51.24, "shaped_reward_min": 9, "shaped_reward_max": 84, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.6, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.44, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 4.92, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.88, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.45, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.85, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.75, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 2.47, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.42, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 9, "soup_drop_agent_1_mean": 0.97, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.85, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.85, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0, 105.0, 87.0, 47.0, 98.0, 31.0, 142.0, 23.0, 96.0, 64.0, 98.0, 65.0, 112.0, 36.0, 23.0, 42.0, 113.0, 33.0, 41.0, 98.0, 148.0, 130.0, 119.0, 39.0, 88.0, 42.0, 142.0, 105.0, 120.0, 179.0, 195.0, 12.0, 12.0, 56.0, 60.0, 9.0, 94.0, 118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 54.0, 51.0, 41.0, 46.0, 15.0, 32.0, 53.0, 45.0, 15.0, 16.0, 76.0, 66.0, 9.0, 14.0, 45.0, 51.0, 33.0, 31.0, 43.0, 55.0, 34.0, 31.0, 60.0, 52.0, 10.0, 26.0, 3.0, 20.0, 22.0, 20.0, 59.0, 54.0, 17.0, 16.0, 22.0, 19.0, 46.0, 52.0, 74.0, 74.0, 59.0, 71.0, 54.0, 65.0, 20.0, 19.0, 47.0, 41.0, 26.0, 16.0, 73.0, 69.0, 49.0, 56.0, 59.0, 61.0, 81.0, 98.0, 98.0, 97.0, 3.0, 9.0, 6.0, 6.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0]}, "sampler_perf": {"mean_env_wait_ms": 4.462790236915632, "mean_processing_ms": 0.9577209626577212, "mean_inference_ms": 4.91290526345304}, "off_policy_estimator": {}, "info": {"num_steps_trained": 912000, "num_steps_sampled": 486400, "sample_time_ms": 197395.402, "load_time_ms": 38.358, "grad_time_ms": 142364.304, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.4551915445207286e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.0073294141329824924, "policy_loss": -0.007997877895832062, "vf_loss": 15.018708229064941, "vf_explained_var": 0.4496181905269623, "kl": 0.0011589183704927564, "entropy": 1.666812539100647, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 486400, "episodes_total": 1216, "training_iteration": 38, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-03-36", "timestamp": 1660248216, "time_this_iter_s": 30.582061052322388, "time_total_s": 6637.05504322052, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6637.05504322052, "timesteps_since_restore": 486400, "iterations_since_restore": 38, "perf": {"cpu_util_percent": 42.890697674418604, "ram_util_percent": 58.16976744186046}} -{"episode_reward_max": 164.0, "episode_reward_min": 9.0, "episode_reward_mean": 80.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 90.0}, "policy_reward_mean": {"ppo": 40.455}, "custom_metrics": {"sparse_reward_mean": 14.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 52.91, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.33, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.33, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.82, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.89, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.99, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 3.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.6, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 3.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0, 56.0, 60.0, 9.0, 94.0, 118.0, 107.0, 31.0, 150.0, 71.0, 58.0, 88.0, 100.0, 155.0, 98.0, 42.0, 110.0, 38.0, 94.0, 61.0, 77.0, 59.0, 66.0, 65.0, 36.0, 130.0, 67.0, 112.0, 72.0, 164.0, 45.0, 111.0, 149.0, 37.0, 82.0, 93.0, 98.0, 155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 30.0, 26.0, 32.0, 28.0, 6.0, 3.0, 46.0, 48.0, 61.0, 57.0, 48.0, 59.0, 16.0, 15.0, 60.0, 90.0, 26.0, 45.0, 40.0, 18.0, 45.0, 43.0, 63.0, 37.0, 78.0, 77.0, 51.0, 47.0, 11.0, 31.0, 60.0, 50.0, 30.0, 8.0, 48.0, 46.0, 37.0, 24.0, 37.0, 40.0, 25.0, 34.0, 29.0, 37.0, 39.0, 26.0, 19.0, 17.0, 62.0, 68.0, 44.0, 23.0, 61.0, 51.0, 39.0, 33.0, 79.0, 85.0, 17.0, 28.0, 56.0, 55.0, 73.0, 76.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0]}, "sampler_perf": {"mean_env_wait_ms": 4.361961744597006, "mean_processing_ms": 0.9376235930507917, "mean_inference_ms": 4.9270347290029886}, "off_policy_estimator": {}, "info": {"num_steps_trained": 936000, "num_steps_sampled": 499200, "sample_time_ms": 197058.326, "load_time_ms": 38.236, "grad_time_ms": 142247.976, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.275957722603643e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.009562704712152481, "policy_loss": -0.010270781815052032, "vf_loss": 15.400076866149902, "vf_explained_var": 0.39905285835266113, "kl": 0.0014264689525589347, "entropy": 1.6638473272323608, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 499200, "episodes_total": 1248, "training_iteration": 39, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-04-04", "timestamp": 1660248244, "time_this_iter_s": 28.12965416908264, "time_total_s": 6665.184697389603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6665.184697389603, "timesteps_since_restore": 499200, "iterations_since_restore": 39, "perf": {"cpu_util_percent": 32.9825, "ram_util_percent": 58.30499999999999}} -{"episode_reward_max": 193.0, "episode_reward_min": 9.0, "episode_reward_mean": 81.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 98.0}, "policy_reward_mean": {"ppo": 40.945}, "custom_metrics": {"sparse_reward_mean": 14.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 53.09, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.34, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.25, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.88, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.78, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 3.9, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.92, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.72, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.2, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.07, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.41, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.9, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.92, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.9, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.92, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0, 37.0, 82.0, 93.0, 98.0, 155.0, 107.0, 66.0, 64.0, 59.0, 130.0, 80.0, 101.0, 98.0, 109.0, 71.0, 68.0, 84.0, 79.0, 57.0, 20.0, 37.0, 82.0, 48.0, 64.0, 84.0, 113.0, 101.0, 62.0, 133.0, 71.0, 23.0, 109.0, 125.0, 23.0, 58.0, 75.0, 95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 12.0, 25.0, 26.0, 56.0, 55.0, 38.0, 48.0, 50.0, 82.0, 73.0, 64.0, 43.0, 37.0, 29.0, 30.0, 34.0, 28.0, 31.0, 62.0, 68.0, 37.0, 43.0, 50.0, 51.0, 44.0, 54.0, 50.0, 59.0, 42.0, 29.0, 29.0, 39.0, 42.0, 42.0, 34.0, 45.0, 37.0, 20.0, 8.0, 12.0, 20.0, 17.0, 42.0, 40.0, 22.0, 26.0, 38.0, 26.0, 34.0, 50.0, 54.0, 59.0, 55.0, 46.0, 40.0, 22.0, 60.0, 73.0, 37.0, 34.0, 11.0, 12.0, 51.0, 58.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0]}, "sampler_perf": {"mean_env_wait_ms": 4.262822214946647, "mean_processing_ms": 0.9177406233023881, "mean_inference_ms": 4.823257056931315}, "off_policy_estimator": {}, "info": {"num_steps_trained": 960000, "num_steps_sampled": 512000, "sample_time_ms": 196701.65, "load_time_ms": 38.048, "grad_time_ms": 142153.928, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.6379788613018216e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.008069280534982681, "policy_loss": -0.008976585231721401, "vf_loss": 17.312698364257812, "vf_explained_var": 0.4009813070297241, "kl": 0.0012740670936182141, "entropy": 1.647910237312317, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 512000, "episodes_total": 1280, "training_iteration": 40, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-04-32", "timestamp": 1660248272, "time_this_iter_s": 27.727252960205078, "time_total_s": 6692.911950349808, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6692.911950349808, "timesteps_since_restore": 512000, "iterations_since_restore": 40, "perf": {"cpu_util_percent": 32.13, "ram_util_percent": 58.30499999999999}} -{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 85.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 113.0}, "policy_reward_mean": {"ppo": 42.77}, "custom_metrics": {"sparse_reward_mean": 14.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 55.94, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.11, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.71, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.9, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.34, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.79, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.45, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.45, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.34, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.79, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.34, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.79, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0, 125.0, 23.0, 58.0, 75.0, 95.0, 36.0, 158.0, 96.0, 95.0, 45.0, 66.0, 127.0, 84.0, 147.0, 34.0, 84.0, 96.0, 118.0, 42.0, 112.0, 112.0, 11.0, 28.0, 66.0, 42.0, 39.0, 85.0, 137.0, 96.0, 31.0, 92.0, 60.0, 66.0, 72.0, 56.0, 104.0, 63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 56.0, 69.0, 17.0, 6.0, 28.0, 30.0, 39.0, 36.0, 50.0, 45.0, 6.0, 30.0, 84.0, 74.0, 46.0, 50.0, 45.0, 50.0, 20.0, 25.0, 33.0, 33.0, 67.0, 60.0, 42.0, 42.0, 66.0, 81.0, 14.0, 20.0, 37.0, 47.0, 50.0, 46.0, 62.0, 56.0, 17.0, 25.0, 41.0, 71.0, 45.0, 67.0, 8.0, 3.0, 16.0, 12.0, 29.0, 37.0, 25.0, 17.0, 28.0, 11.0, 52.0, 33.0, 65.0, 72.0, 47.0, 49.0, 22.0, 9.0, 31.0, 61.0, 30.0, 30.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0]}, "sampler_perf": {"mean_env_wait_ms": 4.168357407076552, "mean_processing_ms": 0.8988004870947216, "mean_inference_ms": 4.723239868801954}, "off_policy_estimator": {}, "info": {"num_steps_trained": 984000, "num_steps_sampled": 524800, "sample_time_ms": 196459.456, "load_time_ms": 38.195, "grad_time_ms": 142037.515, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8189894306509108e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.008718971163034439, "policy_loss": -0.009683111682534218, "vf_loss": 17.845956802368164, "vf_explained_var": 0.43686649203300476, "kl": 0.0014183915918692946, "entropy": 1.6409085988998413, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 524800, "episodes_total": 1312, "training_iteration": 41, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-00", "timestamp": 1660248300, "time_this_iter_s": 27.954697370529175, "time_total_s": 6720.866647720337, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6720.866647720337, "timesteps_since_restore": 524800, "iterations_since_restore": 41, "perf": {"cpu_util_percent": 35.58461538461538, "ram_util_percent": 58.16923076923076}} -{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 92.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 46.035}, "custom_metrics": {"sparse_reward_mean": 16.8, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 58.47, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.0, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 7.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.54, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.75, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.6, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.18, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.87, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 3.27, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.25, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.6, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.6, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0, 66.0, 72.0, 56.0, 104.0, 63.0, 122.0, 107.0, 193.0, 107.0, 44.0, 20.0, 144.0, 80.0, 53.0, 156.0, 100.0, 55.0, 74.0, 89.0, 9.0, 69.0, 52.0, 50.0, 96.0, 115.0, 58.0, 87.0, 144.0, 92.0, 20.0, 63.0, 81.0, 115.0, 84.0, 99.0, 150.0, 104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 34.0, 32.0, 28.0, 44.0, 22.0, 34.0, 51.0, 53.0, 31.0, 32.0, 55.0, 67.0, 50.0, 57.0, 95.0, 98.0, 52.0, 55.0, 14.0, 30.0, 6.0, 14.0, 78.0, 66.0, 40.0, 40.0, 20.0, 33.0, 75.0, 81.0, 47.0, 53.0, 27.0, 28.0, 39.0, 35.0, 49.0, 40.0, 6.0, 3.0, 41.0, 28.0, 27.0, 25.0, 30.0, 20.0, 54.0, 42.0, 59.0, 56.0, 27.0, 31.0, 37.0, 50.0, 76.0, 68.0, 37.0, 55.0, 5.0, 15.0, 24.0, 39.0, 43.0, 38.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0]}, "sampler_perf": {"mean_env_wait_ms": 4.078369518030482, "mean_processing_ms": 0.8807722404539655, "mean_inference_ms": 4.6286338379623215}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1008000, "num_steps_sampled": 537600, "sample_time_ms": 196166.577, "load_time_ms": 37.661, "grad_time_ms": 141994.493, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 9.094947153254554e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.00832280796021223, "policy_loss": -0.009453889913856983, "vf_loss": 19.490577697753906, "vf_explained_var": 0.44570884108543396, "kl": 0.0015499308938160539, "entropy": 1.6359552145004272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 537600, "episodes_total": 1344, "training_iteration": 42, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-29", "timestamp": 1660248329, "time_this_iter_s": 29.278310775756836, "time_total_s": 6750.144958496094, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6750.144958496094, "timesteps_since_restore": 537600, "iterations_since_restore": 42, "perf": {"cpu_util_percent": 35.96428571428572, "ram_util_percent": 58.190476190476204}} -{"episode_reward_max": 213.0, "episode_reward_min": 6.0, "episode_reward_mean": 92.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 46.195}, "custom_metrics": {"sparse_reward_mean": 17.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 57.99, "shaped_reward_min": 6, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.96, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.36, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.46, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.68, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 1.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.56, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.38, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.2, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.47, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.99, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.25, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.19, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.56, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.56, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0, 115.0, 84.0, 99.0, 150.0, 104.0, 141.0, 53.0, 70.0, 96.0, 127.0, 58.0, 31.0, 6.0, 52.0, 141.0, 55.0, 64.0, 88.0, 148.0, 69.0, 187.0, 50.0, 124.0, 107.0, 93.0, 99.0, 116.0, 72.0, 89.0, 42.0, 146.0, 213.0, 118.0, 82.0, 81.0, 28.0, 73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 56.0, 59.0, 32.0, 52.0, 50.0, 49.0, 70.0, 80.0, 48.0, 56.0, 68.0, 73.0, 25.0, 28.0, 42.0, 28.0, 53.0, 43.0, 58.0, 69.0, 34.0, 24.0, 19.0, 12.0, 3.0, 3.0, 23.0, 29.0, 73.0, 68.0, 21.0, 34.0, 42.0, 22.0, 35.0, 53.0, 68.0, 80.0, 41.0, 28.0, 99.0, 88.0, 30.0, 20.0, 60.0, 64.0, 59.0, 48.0, 48.0, 45.0, 53.0, 46.0, 57.0, 59.0, 37.0, 35.0, 44.0, 45.0, 20.0, 22.0, 76.0, 70.0, 100.0, 113.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0]}, "sampler_perf": {"mean_env_wait_ms": 3.992670298084334, "mean_processing_ms": 0.8636158543743789, "mean_inference_ms": 4.538596678243932}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1032000, "num_steps_sampled": 550400, "sample_time_ms": 195606.597, "load_time_ms": 37.682, "grad_time_ms": 141791.558, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.547473576627277e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.009549283422529697, "policy_loss": -0.010450693778693676, "vf_loss": 17.197433471679688, "vf_explained_var": 0.4546402394771576, "kl": 0.00132859090808779, "entropy": 1.6366652250289917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 550400, "episodes_total": 1376, "training_iteration": 43, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-05-57", "timestamp": 1660248357, "time_this_iter_s": 27.376117944717407, "time_total_s": 6777.521076440811, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6777.521076440811, "timesteps_since_restore": 550400, "iterations_since_restore": 43, "perf": {"cpu_util_percent": 34.52051282051282, "ram_util_percent": 58.123076923076916}} -{"episode_reward_max": 212.0, "episode_reward_min": 9.0, "episode_reward_mean": 89.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 116.0}, "policy_reward_mean": {"ppo": 44.72}, "custom_metrics": {"sparse_reward_mean": 17.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 55.44, "shaped_reward_min": 9, "shaped_reward_max": 92, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.64, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.03, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.0, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 1.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.16, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 5.24, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 1.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 12, "soup_pickup_agent_1_mean": 2.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.68, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.22, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.16, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 5.24, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.16, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 5.24, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0, 118.0, 82.0, 81.0, 28.0, 73.0, 64.0, 96.0, 101.0, 66.0, 168.0, 146.0, 144.0, 87.0, 61.0, 95.0, 58.0, 122.0, 91.0, 58.0, 138.0, 78.0, 58.0, 58.0, 36.0, 110.0, 76.0, 99.0, 169.0, 167.0, 201.0, 55.0, 104.0, 212.0, 31.0, 115.0, 31.0, 45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 57.0, 61.0, 48.0, 34.0, 31.0, 50.0, 3.0, 25.0, 41.0, 32.0, 30.0, 34.0, 43.0, 53.0, 48.0, 53.0, 29.0, 37.0, 91.0, 77.0, 76.0, 70.0, 68.0, 76.0, 51.0, 36.0, 28.0, 33.0, 54.0, 41.0, 19.0, 39.0, 62.0, 60.0, 48.0, 43.0, 30.0, 28.0, 57.0, 81.0, 42.0, 36.0, 30.0, 28.0, 34.0, 24.0, 15.0, 21.0, 57.0, 53.0, 49.0, 27.0, 50.0, 49.0, 76.0, 93.0, 83.0, 84.0, 85.0, 116.0, 35.0, 20.0, 41.0, 63.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.9108563485299497, "mean_processing_ms": 0.8472286288222008, "mean_inference_ms": 4.453100666428265}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1056000, "num_steps_sampled": 563200, "sample_time_ms": 195418.359, "load_time_ms": 37.483, "grad_time_ms": 141705.307, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.2737367883136385e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.010453901253640652, "policy_loss": -0.011599976569414139, "vf_loss": 19.665088653564453, "vf_explained_var": 0.43753400444984436, "kl": 0.0012759790988638997, "entropy": 1.640870451927185, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 563200, "episodes_total": 1408, "training_iteration": 44, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-06-24", "timestamp": 1660248384, "time_this_iter_s": 27.344013929367065, "time_total_s": 6804.865090370178, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6804.865090370178, "timesteps_since_restore": 563200, "iterations_since_restore": 44, "perf": {"cpu_util_percent": 32.94102564102564, "ram_util_percent": 58.05128205128204}} -{"episode_reward_max": 239.0, "episode_reward_min": 9.0, "episode_reward_mean": 90.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 127.0}, "policy_reward_mean": {"ppo": 45.19}, "custom_metrics": {"sparse_reward_mean": 19.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.38, "shaped_reward_min": 9, "shaped_reward_max": 92, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.55, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.59, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.0, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.95, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 4.08, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 5.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.28, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 2.8, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.55, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.8, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.08, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 5.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.08, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 5.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0, 212.0, 31.0, 115.0, 31.0, 45.0, 93.0, 46.0, 61.0, 123.0, 57.0, 42.0, 23.0, 31.0, 128.0, 52.0, 85.0, 146.0, 137.0, 25.0, 112.0, 102.0, 119.0, 28.0, 84.0, 68.0, 101.0, 144.0, 92.0, 88.0, 58.0, 98.0, 109.0, 139.0, 91.0, 48.0, 98.0, 147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 105.0, 107.0, 17.0, 14.0, 56.0, 59.0, 8.0, 23.0, 31.0, 14.0, 37.0, 56.0, 21.0, 25.0, 41.0, 20.0, 65.0, 58.0, 19.0, 38.0, 16.0, 26.0, 9.0, 14.0, 14.0, 17.0, 49.0, 79.0, 22.0, 30.0, 43.0, 42.0, 72.0, 74.0, 63.0, 74.0, 19.0, 6.0, 56.0, 56.0, 55.0, 47.0, 55.0, 64.0, 11.0, 17.0, 31.0, 53.0, 37.0, 31.0, 53.0, 48.0, 66.0, 78.0, 53.0, 39.0, 42.0, 46.0, 19.0, 39.0, 52.0, 46.0, 53.0, 56.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.83276732749063, "mean_processing_ms": 0.8315868647990772, "mean_inference_ms": 4.371610853440936}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1080000, "num_steps_sampled": 576000, "sample_time_ms": 158697.168, "load_time_ms": 37.561, "grad_time_ms": 45441.027, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.1368683941568192e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.007090561557561159, "policy_loss": -0.008278795517981052, "vf_loss": 20.059175491333008, "vf_explained_var": 0.4839383065700531, "kl": 0.0014106096932664514, "entropy": 1.63534414768219, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 576000, "episodes_total": 1440, "training_iteration": 45, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-06-54", "timestamp": 1660248414, "time_this_iter_s": 29.613693952560425, "time_total_s": 6834.478784322739, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6834.478784322739, "timesteps_since_restore": 576000, "iterations_since_restore": 45, "perf": {"cpu_util_percent": 32.607142857142854, "ram_util_percent": 58.099999999999994}} -{"episode_reward_max": 239.0, "episode_reward_min": 9.0, "episode_reward_mean": 94.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 127.0}, "policy_reward_mean": {"ppo": 47.38}, "custom_metrics": {"sparse_reward_mean": 20.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 53.16, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.72, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 7.12, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.09, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.56, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 1.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.82, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 2.53, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.99, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.82, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.82, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0, 139.0, 91.0, 48.0, 98.0, 147.0, 136.0, 50.0, 114.0, 28.0, 49.0, 161.0, 95.0, 135.0, 98.0, 121.0, 165.0, 12.0, 110.0, 104.0, 84.0, 103.0, 9.0, 66.0, 101.0, 31.0, 42.0, 34.0, 50.0, 90.0, 101.0, 33.0, 191.0, 47.0, 61.0, 144.0, 82.0, 239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 65.0, 74.0, 42.0, 49.0, 19.0, 29.0, 50.0, 48.0, 76.0, 71.0, 62.0, 74.0, 27.0, 23.0, 61.0, 53.0, 12.0, 16.0, 18.0, 31.0, 69.0, 92.0, 44.0, 51.0, 81.0, 54.0, 40.0, 58.0, 63.0, 58.0, 85.0, 80.0, 3.0, 9.0, 54.0, 56.0, 50.0, 54.0, 42.0, 42.0, 42.0, 61.0, 0.0, 9.0, 35.0, 31.0, 53.0, 48.0, 22.0, 9.0, 20.0, 22.0, 20.0, 14.0, 14.0, 36.0, 43.0, 47.0, 53.0, 48.0, 13.0, 20.0, 85.0, 106.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0]}, "sampler_perf": {"mean_env_wait_ms": 3.75819263232233, "mean_processing_ms": 0.8166563749373907, "mean_inference_ms": 4.294229000839884}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1104000, "num_steps_sampled": 588800, "sample_time_ms": 20385.134, "load_time_ms": 37.538, "grad_time_ms": 8901.075, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.684341970784096e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.00807119719684124, "policy_loss": -0.009133302606642246, "vf_loss": 18.76689338684082, "vf_explained_var": 0.5084854960441589, "kl": 0.0014663866022601724, "entropy": 1.6291638612747192, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 588800, "episodes_total": 1472, "training_iteration": 46, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-07-23", "timestamp": 1660248443, "time_this_iter_s": 29.134671926498413, "time_total_s": 6863.613456249237, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6863.613456249237, "timesteps_since_restore": 588800, "iterations_since_restore": 46, "perf": {"cpu_util_percent": 34.358536585365854, "ram_util_percent": 58.190243902439015}} -{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 94.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 47.0}, "custom_metrics": {"sparse_reward_mean": 21.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.0, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.57, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 6.59, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 5.07, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.13, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 1.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.41, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.63, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.29, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.49, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 0.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.84, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.65, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.89, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.41, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.63, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.41, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.63, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0, 47.0, 61.0, 144.0, 82.0, 239.0, 130.0, 113.0, 17.0, 130.0, 162.0, 144.0, 153.0, 152.0, 55.0, 105.0, 53.0, 182.0, 77.0, 161.0, 71.0, 31.0, 95.0, 93.0, 92.0, 25.0, 77.0, 12.0, 47.0, 77.0, 88.0, 193.0, 58.0, 115.0, 63.0, 74.0, 98.0, 164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 16.0, 31.0, 26.0, 35.0, 80.0, 64.0, 34.0, 48.0, 127.0, 112.0, 68.0, 62.0, 46.0, 67.0, 5.0, 12.0, 62.0, 68.0, 79.0, 83.0, 71.0, 73.0, 84.0, 69.0, 83.0, 69.0, 30.0, 25.0, 51.0, 54.0, 23.0, 30.0, 91.0, 91.0, 40.0, 37.0, 71.0, 90.0, 23.0, 48.0, 17.0, 14.0, 48.0, 47.0, 50.0, 43.0, 46.0, 46.0, 11.0, 14.0, 29.0, 48.0, 3.0, 9.0, 30.0, 17.0, 40.0, 37.0, 56.0, 32.0, 97.0, 96.0, 33.0, 25.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0]}, "sampler_perf": {"mean_env_wait_ms": 3.6869910389254943, "mean_processing_ms": 0.8024023118044294, "mean_inference_ms": 4.221576344650746}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1128000, "num_steps_sampled": 601600, "sample_time_ms": 19943.652, "load_time_ms": 37.337, "grad_time_ms": 8965.172, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.842170985392048e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.007924961857497692, "policy_loss": -0.009038448333740234, "vf_loss": 19.34569549560547, "vf_explained_var": 0.504978597164154, "kl": 0.001396413892507553, "entropy": 1.6421631574630737, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 601600, "episodes_total": 1504, "training_iteration": 47, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-07-55", "timestamp": 1660248475, "time_this_iter_s": 32.583869218826294, "time_total_s": 6896.197325468063, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6896.197325468063, "timesteps_since_restore": 601600, "iterations_since_restore": 47, "perf": {"cpu_util_percent": 34.10869565217391, "ram_util_percent": 58.16956521739129}} -{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 101.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 50.81}, "custom_metrics": {"sparse_reward_mean": 22.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 56.02, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.76, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 6.77, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.56, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 5.52, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.87, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 4.79, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.97, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.66, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.03, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.83, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.39, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 1.81, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.82, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 4.79, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.97, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.79, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.97, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0, 115.0, 63.0, 74.0, 98.0, 164.0, 92.0, 107.0, 36.0, 41.0, 99.0, 107.0, 73.0, 42.0, 204.0, 99.0, 139.0, 45.0, 93.0, 104.0, 93.0, 198.0, 148.0, 74.0, 66.0, 152.0, 50.0, 122.0, 112.0, 39.0, 87.0, 122.0, 34.0, 115.0, 84.0, 144.0, 39.0, 245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 67.0, 48.0, 34.0, 29.0, 37.0, 37.0, 54.0, 44.0, 91.0, 73.0, 32.0, 60.0, 45.0, 62.0, 22.0, 14.0, 14.0, 27.0, 53.0, 46.0, 59.0, 48.0, 37.0, 36.0, 17.0, 25.0, 92.0, 112.0, 42.0, 57.0, 65.0, 74.0, 17.0, 28.0, 45.0, 48.0, 43.0, 61.0, 43.0, 50.0, 97.0, 101.0, 72.0, 76.0, 38.0, 36.0, 32.0, 34.0, 81.0, 71.0, 27.0, 23.0, 60.0, 62.0, 59.0, 53.0, 27.0, 12.0, 37.0, 50.0, 58.0, 64.0, 12.0, 22.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0]}, "sampler_perf": {"mean_env_wait_ms": 3.619074133252118, "mean_processing_ms": 0.7888363298173107, "mean_inference_ms": 4.154787775271583}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1152000, "num_steps_sampled": 614400, "sample_time_ms": 20418.706, "load_time_ms": 37.151, "grad_time_ms": 9069.585, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.421085492696024e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.01129829604178667, "policy_loss": -0.012764283455908298, "vf_loss": 22.684043884277344, "vf_explained_var": 0.5366321206092834, "kl": 0.0014537613606080413, "entropy": 1.6048468351364136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 614400, "episodes_total": 1536, "training_iteration": 48, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-08-32", "timestamp": 1660248512, "time_this_iter_s": 36.37463116645813, "time_total_s": 6932.5719566345215, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6932.5719566345215, "timesteps_since_restore": 614400, "iterations_since_restore": 48, "perf": {"cpu_util_percent": 36.49999999999999, "ram_util_percent": 58.29423076923076}} -{"episode_reward_max": 245.0, "episode_reward_min": 9.0, "episode_reward_mean": 109.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 54.89}, "custom_metrics": {"sparse_reward_mean": 25.6, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 58.58, "shaped_reward_min": 9, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.59, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.48, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.59, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.35, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 4.63, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.66, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.69, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.71, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.88, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.63, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.66, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.63, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.66, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0, 115.0, 84.0, 144.0, 39.0, 245.0, 133.0, 23.0, 133.0, 37.0, 55.0, 22.0, 55.0, 69.0, 87.0, 68.0, 71.0, 9.0, 9.0, 51.0, 88.0, 136.0, 125.0, 118.0, 139.0, 95.0, 130.0, 67.0, 79.0, 38.0, 82.0, 20.0, 98.0, 190.0, 31.0, 153.0, 104.0, 173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 51.0, 64.0, 45.0, 39.0, 74.0, 70.0, 25.0, 14.0, 110.0, 135.0, 59.0, 74.0, 11.0, 12.0, 68.0, 65.0, 22.0, 15.0, 34.0, 21.0, 11.0, 11.0, 23.0, 32.0, 25.0, 44.0, 46.0, 41.0, 42.0, 26.0, 37.0, 34.0, 0.0, 9.0, 0.0, 9.0, 24.0, 27.0, 43.0, 45.0, 66.0, 70.0, 64.0, 61.0, 54.0, 64.0, 63.0, 76.0, 56.0, 39.0, 59.0, 71.0, 30.0, 37.0, 36.0, 43.0, 16.0, 22.0, 40.0, 42.0, 3.0, 17.0, 45.0, 53.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.5541442522194506, "mean_processing_ms": 0.7759022948286116, "mean_inference_ms": 4.093132668646798}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1176000, "num_steps_sampled": 627200, "sample_time_ms": 20998.642, "load_time_ms": 37.333, "grad_time_ms": 9227.616, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.10542746348012e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032997550442814827, "policy_loss": -0.004892440978437662, "vf_loss": 23.954416275024414, "vf_explained_var": 0.525080680847168, "kl": 0.0015437895199283957, "entropy": 1.6054998636245728, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 627200, "episodes_total": 1568, "training_iteration": 49, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-09-07", "timestamp": 1660248547, "time_this_iter_s": 35.51046395301819, "time_total_s": 6968.08242058754, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6968.08242058754, "timesteps_since_restore": 627200, "iterations_since_restore": 49, "perf": {"cpu_util_percent": 33.118, "ram_util_percent": 57.68999999999998}} -{"episode_reward_max": 238.0, "episode_reward_min": 9.0, "episode_reward_mean": 123.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 124.0}, "policy_reward_mean": {"ppo": 61.525}, "custom_metrics": {"sparse_reward_mean": 29.6, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 63.85, "shaped_reward_min": 9, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.32, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 8.04, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 6.33, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.84, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 1.88, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.82, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 5.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.33, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 2.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.89, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.84, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.96, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0, 190.0, 31.0, 153.0, 104.0, 173.0, 93.0, 109.0, 218.0, 177.0, 155.0, 136.0, 150.0, 96.0, 191.0, 93.0, 98.0, 150.0, 110.0, 76.0, 107.0, 121.0, 158.0, 139.0, 159.0, 38.0, 148.0, 47.0, 117.0, 141.0, 9.0, 141.0, 87.0, 148.0, 144.0, 93.0, 106.0, 101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 91.0, 99.0, 19.0, 12.0, 78.0, 75.0, 59.0, 45.0, 88.0, 85.0, 50.0, 43.0, 57.0, 52.0, 94.0, 124.0, 74.0, 103.0, 85.0, 70.0, 76.0, 60.0, 64.0, 86.0, 53.0, 43.0, 89.0, 102.0, 49.0, 44.0, 52.0, 46.0, 84.0, 66.0, 51.0, 59.0, 40.0, 36.0, 53.0, 54.0, 65.0, 56.0, 74.0, 84.0, 68.0, 71.0, 81.0, 78.0, 11.0, 27.0, 67.0, 81.0, 19.0, 28.0, 51.0, 66.0, 73.0, 68.0, 6.0, 3.0, 71.0, 70.0, 36.0, 51.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0]}, "sampler_perf": {"mean_env_wait_ms": 3.491898537786064, "mean_processing_ms": 0.7635432106455172, "mean_inference_ms": 4.03419160595968}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1200000, "num_steps_sampled": 640000, "sample_time_ms": 21322.647, "load_time_ms": 37.352, "grad_time_ms": 9260.248, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 3.55271373174006e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.005630036350339651, "policy_loss": -0.007203007582575083, "vf_loss": 23.694684982299805, "vf_explained_var": 0.5489806532859802, "kl": 0.0013687704922631383, "entropy": 1.592978835105896, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 640000, "episodes_total": 1600, "training_iteration": 50, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-09-39", "timestamp": 1660248579, "time_this_iter_s": 31.292397022247314, "time_total_s": 6999.374817609787, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 6999.374817609787, "timesteps_since_restore": 640000, "iterations_since_restore": 50, "perf": {"cpu_util_percent": 38.30227272727273, "ram_util_percent": 57.75909090909093}} -{"episode_reward_max": 238.0, "episode_reward_min": 9.0, "episode_reward_mean": 125.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 122.0}, "policy_reward_mean": {"ppo": 62.735}, "custom_metrics": {"sparse_reward_mean": 29.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 67.07, "shaped_reward_min": 9, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.57, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 8.17, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 6.51, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.96, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.87, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.35, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.28, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.95, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.96, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.79, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.85, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.35, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.28, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.35, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.28, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0, 148.0, 144.0, 93.0, 106.0, 101.0, 110.0, 157.0, 167.0, 84.0, 71.0, 128.0, 42.0, 162.0, 184.0, 88.0, 108.0, 204.0, 95.0, 179.0, 155.0, 147.0, 121.0, 207.0, 76.0, 113.0, 39.0, 134.0, 75.0, 31.0, 9.0, 181.0, 162.0, 204.0, 187.0, 20.0, 167.0, 127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 68.0, 80.0, 74.0, 70.0, 42.0, 51.0, 58.0, 48.0, 54.0, 47.0, 50.0, 60.0, 71.0, 86.0, 76.0, 91.0, 46.0, 38.0, 30.0, 41.0, 65.0, 63.0, 24.0, 18.0, 75.0, 87.0, 85.0, 99.0, 42.0, 46.0, 53.0, 55.0, 105.0, 99.0, 50.0, 45.0, 96.0, 83.0, 76.0, 79.0, 69.0, 78.0, 67.0, 54.0, 99.0, 108.0, 30.0, 46.0, 53.0, 60.0, 28.0, 11.0, 57.0, 77.0, 22.0, 53.0, 20.0, 11.0, 3.0, 6.0, 98.0, 83.0, 92.0, 70.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0]}, "sampler_perf": {"mean_env_wait_ms": 3.4320001935473794, "mean_processing_ms": 0.7516646134052514, "mean_inference_ms": 3.975737374961292}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1224000, "num_steps_sampled": 652800, "sample_time_ms": 21552.858, "load_time_ms": 37.216, "grad_time_ms": 9227.975, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.77635686587003e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032480310183018446, "policy_loss": -0.004836531355977058, "vf_loss": 23.794113159179688, "vf_explained_var": 0.5322676301002502, "kl": 0.0011860225349664688, "entropy": 1.5818275213241577, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 652800, "episodes_total": 1632, "training_iteration": 51, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-10-09", "timestamp": 1660248609, "time_this_iter_s": 29.93578290939331, "time_total_s": 7029.31060051918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7029.31060051918, "timesteps_since_restore": 652800, "iterations_since_restore": 51, "perf": {"cpu_util_percent": 36.09523809523809, "ram_util_percent": 57.82142857142857}} -{"episode_reward_max": 264.0, "episode_reward_min": 12.0, "episode_reward_mean": 128.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 139.0}, "policy_reward_mean": {"ppo": 64.12}, "custom_metrics": {"sparse_reward_mean": 30.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 67.84, "shaped_reward_min": 12, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.73, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.6, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 6.65, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.97, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.99, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.52, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.15, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.97, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.88, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 5.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.99, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.99, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0, 204.0, 187.0, 20.0, 167.0, 127.0, 147.0, 128.0, 210.0, 179.0, 107.0, 141.0, 75.0, 107.0, 175.0, 129.0, 153.0, 88.0, 98.0, 71.0, 105.0, 238.0, 110.0, 164.0, 68.0, 118.0, 107.0, 65.0, 171.0, 209.0, 50.0, 164.0, 99.0, 88.0, 12.0, 122.0, 166.0, 98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 93.0, 111.0, 85.0, 102.0, 17.0, 3.0, 82.0, 85.0, 48.0, 79.0, 74.0, 73.0, 68.0, 60.0, 102.0, 108.0, 87.0, 92.0, 51.0, 56.0, 64.0, 77.0, 41.0, 34.0, 67.0, 40.0, 89.0, 86.0, 68.0, 61.0, 79.0, 74.0, 43.0, 45.0, 49.0, 49.0, 35.0, 36.0, 56.0, 49.0, 122.0, 116.0, 48.0, 62.0, 90.0, 74.0, 34.0, 34.0, 64.0, 54.0, 51.0, 56.0, 34.0, 31.0, 91.0, 80.0, 108.0, 101.0, 25.0, 25.0, 80.0, 84.0, 57.0, 42.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0]}, "sampler_perf": {"mean_env_wait_ms": 3.3744175644507366, "mean_processing_ms": 0.7402432912819328, "mean_inference_ms": 3.918069988577329}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1248000, "num_steps_sampled": 665600, "sample_time_ms": 21746.558, "load_time_ms": 37.515, "grad_time_ms": 9170.716, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 8.88178432935015e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.00774852791801095, "policy_loss": -0.009392179548740387, "vf_loss": 24.351181030273438, "vf_explained_var": 0.5798514485359192, "kl": 0.0016348478384315968, "entropy": 1.5829213857650757, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 665600, "episodes_total": 1664, "training_iteration": 52, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-10-39", "timestamp": 1660248639, "time_this_iter_s": 30.649518966674805, "time_total_s": 7059.960119485855, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7059.960119485855, "timesteps_since_restore": 665600, "iterations_since_restore": 52, "perf": {"cpu_util_percent": 41.01818181818182, "ram_util_percent": 57.649999999999984}} -{"episode_reward_max": 293.0, "episode_reward_min": 12.0, "episode_reward_mean": 128.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 154.0}, "policy_reward_mean": {"ppo": 64.38}, "custom_metrics": {"sparse_reward_mean": 30.0, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 68.76, "shaped_reward_min": 12, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.92, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.71, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.68, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.5, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.85, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.81, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.87, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 5.85, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.81, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.85, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.81, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0, 88.0, 12.0, 122.0, 166.0, 98.0, 63.0, 63.0, 192.0, 93.0, 178.0, 196.0, 53.0, 109.0, 156.0, 136.0, 166.0, 163.0, 175.0, 118.0, 115.0, 137.0, 115.0, 110.0, 155.0, 132.0, 79.0, 158.0, 213.0, 104.0, 153.0, 70.0, 195.0, 153.0, 85.0, 101.0, 123.0, 141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 42.0, 46.0, 3.0, 9.0, 54.0, 68.0, 88.0, 78.0, 36.0, 62.0, 38.0, 25.0, 35.0, 28.0, 94.0, 98.0, 41.0, 52.0, 86.0, 92.0, 106.0, 90.0, 24.0, 29.0, 63.0, 46.0, 63.0, 93.0, 73.0, 63.0, 85.0, 81.0, 90.0, 73.0, 71.0, 104.0, 70.0, 48.0, 56.0, 59.0, 71.0, 66.0, 56.0, 59.0, 54.0, 56.0, 73.0, 82.0, 65.0, 67.0, 44.0, 35.0, 71.0, 87.0, 108.0, 105.0, 58.0, 46.0, 88.0, 65.0, 35.0, 35.0, 107.0, 88.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0]}, "sampler_perf": {"mean_env_wait_ms": 3.319165284336134, "mean_processing_ms": 0.7292810578162132, "mean_inference_ms": 3.8629630777095305}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1272000, "num_steps_sampled": 678400, "sample_time_ms": 22200.855, "load_time_ms": 37.251, "grad_time_ms": 9256.826, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 4.440892164675075e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.006719778757542372, "policy_loss": -0.008363676257431507, "vf_loss": 24.369796752929688, "vf_explained_var": 0.5794721245765686, "kl": 0.0014526437735185027, "entropy": 1.586159348487854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 678400, "episodes_total": 1696, "training_iteration": 53, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-11-12", "timestamp": 1660248672, "time_this_iter_s": 32.775245904922485, "time_total_s": 7092.735365390778, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7092.735365390778, "timesteps_since_restore": 678400, "iterations_since_restore": 53, "perf": {"cpu_util_percent": 38.126086956521746, "ram_util_percent": 57.626086956521746}} -{"episode_reward_max": 296.0, "episode_reward_min": 14.0, "episode_reward_mean": 132.59, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 154.0}, "policy_reward_mean": {"ppo": 66.295}, "custom_metrics": {"sparse_reward_mean": 31.4, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 69.79, "shaped_reward_min": 14, "shaped_reward_max": 110, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.04, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 7.88, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.88, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.68, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.93, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.95, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.92, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.44, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.84, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.97, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 5.95, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.92, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.95, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.92, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0, 153.0, 85.0, 101.0, 123.0, 141.0, 138.0, 143.0, 211.0, 127.0, 20.0, 65.0, 150.0, 93.0, 153.0, 47.0, 144.0, 264.0, 148.0, 59.0, 121.0, 39.0, 204.0, 187.0, 208.0, 121.0, 124.0, 14.0, 95.0, 81.0, 107.0, 198.0, 114.0, 104.0, 152.0, 238.0, 88.0, 207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 70.0, 83.0, 48.0, 37.0, 59.0, 42.0, 63.0, 60.0, 65.0, 76.0, 57.0, 81.0, 92.0, 51.0, 115.0, 96.0, 62.0, 65.0, 9.0, 11.0, 36.0, 29.0, 65.0, 85.0, 54.0, 39.0, 88.0, 65.0, 33.0, 14.0, 65.0, 79.0, 125.0, 139.0, 77.0, 71.0, 33.0, 26.0, 65.0, 56.0, 20.0, 19.0, 91.0, 113.0, 98.0, 89.0, 110.0, 98.0, 53.0, 68.0, 57.0, 67.0, 8.0, 6.0, 56.0, 39.0, 47.0, 34.0, 46.0, 61.0, 93.0, 105.0, 55.0, 59.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0]}, "sampler_perf": {"mean_env_wait_ms": 3.2660665746708606, "mean_processing_ms": 0.718736935276724, "mean_inference_ms": 3.8108664134221066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1296000, "num_steps_sampled": 691200, "sample_time_ms": 22619.672, "load_time_ms": 37.409, "grad_time_ms": 9296.582, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 2.2204460823375376e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.006839328911155462, "policy_loss": -0.008672266267240047, "vf_loss": 26.157081604003906, "vf_explained_var": 0.5799071192741394, "kl": 0.0013235282385721803, "entropy": 1.5655454397201538, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 691200, "episodes_total": 1728, "training_iteration": 54, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-11-44", "timestamp": 1660248704, "time_this_iter_s": 31.93130087852478, "time_total_s": 7124.666666269302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7124.666666269302, "timesteps_since_restore": 691200, "iterations_since_restore": 54, "perf": {"cpu_util_percent": 38.36222222222223, "ram_util_percent": 57.54666666666667}} -{"episode_reward_max": 301.0, "episode_reward_min": 17.0, "episode_reward_mean": 138.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 155.0}, "policy_reward_mean": {"ppo": 69.05}, "custom_metrics": {"sparse_reward_mean": 33.6, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 70.9, "shaped_reward_min": 17, "shaped_reward_max": 113, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.15, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.01, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.92, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.88, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 6.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.19, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.93, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 6.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.19, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.19, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0, 104.0, 152.0, 238.0, 88.0, 207.0, 153.0, 195.0, 101.0, 90.0, 210.0, 225.0, 106.0, 121.0, 75.0, 60.0, 170.0, 23.0, 130.0, 147.0, 136.0, 39.0, 133.0, 158.0, 101.0, 66.0, 116.0, 50.0, 167.0, 146.0, 293.0, 36.0, 109.0, 245.0, 121.0, 207.0, 97.0, 205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 64.0, 40.0, 73.0, 79.0, 125.0, 113.0, 40.0, 48.0, 105.0, 102.0, 68.0, 85.0, 96.0, 99.0, 53.0, 48.0, 38.0, 52.0, 106.0, 104.0, 113.0, 112.0, 53.0, 53.0, 59.0, 62.0, 47.0, 28.0, 22.0, 38.0, 90.0, 80.0, 9.0, 14.0, 70.0, 60.0, 82.0, 65.0, 70.0, 66.0, 19.0, 20.0, 64.0, 69.0, 96.0, 62.0, 62.0, 39.0, 31.0, 35.0, 70.0, 46.0, 27.0, 23.0, 82.0, 85.0, 71.0, 75.0, 154.0, 139.0, 16.0, 20.0, 56.0, 53.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0]}, "sampler_perf": {"mean_env_wait_ms": 3.214861984478974, "mean_processing_ms": 0.7085517018126862, "mean_inference_ms": 3.760458478559153}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1320000, "num_steps_sampled": 704000, "sample_time_ms": 22712.594, "load_time_ms": 37.614, "grad_time_ms": 9158.532, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.1102230411687688e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.007639638613909483, "policy_loss": -0.00974108837544918, "vf_loss": 28.871795654296875, "vf_explained_var": 0.5772756934165955, "kl": 0.0015572212869301438, "entropy": 1.5714462995529175, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 704000, "episodes_total": 1760, "training_iteration": 55, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-12-13", "timestamp": 1660248733, "time_this_iter_s": 29.164530992507935, "time_total_s": 7153.83119726181, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7153.83119726181, "timesteps_since_restore": 704000, "iterations_since_restore": 55, "perf": {"cpu_util_percent": 37.4609756097561, "ram_util_percent": 57.50487804878048}} -{"episode_reward_max": 301.0, "episode_reward_min": 12.0, "episode_reward_mean": 149.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 74.835}, "custom_metrics": {"sparse_reward_mean": 37.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 74.07, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.1, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.78, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 6.98, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 7.57, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 5.98, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.1, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 2.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.8, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.89, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 5.98, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.98, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0, 245.0, 121.0, 207.0, 97.0, 205.0, 61.0, 126.0, 104.0, 179.0, 296.0, 72.0, 126.0, 172.0, 17.0, 155.0, 207.0, 227.0, 101.0, 110.0, 101.0, 207.0, 77.0, 155.0, 159.0, 155.0, 158.0, 150.0, 96.0, 81.0, 125.0, 155.0, 42.0, 270.0, 155.0, 147.0, 75.0, 144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 126.0, 119.0, 62.0, 59.0, 103.0, 104.0, 45.0, 52.0, 105.0, 100.0, 28.0, 33.0, 65.0, 61.0, 51.0, 53.0, 92.0, 87.0, 143.0, 153.0, 32.0, 40.0, 69.0, 57.0, 75.0, 97.0, 3.0, 14.0, 79.0, 76.0, 107.0, 100.0, 102.0, 125.0, 37.0, 64.0, 49.0, 61.0, 42.0, 59.0, 104.0, 103.0, 43.0, 34.0, 68.0, 87.0, 77.0, 82.0, 64.0, 91.0, 74.0, 84.0, 84.0, 66.0, 50.0, 46.0, 42.0, 39.0, 69.0, 56.0, 86.0, 69.0, 23.0, 19.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0]}, "sampler_perf": {"mean_env_wait_ms": 3.16541739890218, "mean_processing_ms": 0.6987189186943766, "mean_inference_ms": 3.711599248384498}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1344000, "num_steps_sampled": 716800, "sample_time_ms": 22951.468, "load_time_ms": 37.343, "grad_time_ms": 9065.669, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 5.551115205843844e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004821139387786388, "policy_loss": -0.006896324921399355, "vf_loss": 28.4981746673584, "vf_explained_var": 0.6428199410438538, "kl": 0.0015486044576391578, "entropy": 1.5492569208145142, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 716800, "episodes_total": 1792, "training_iteration": 56, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-12-44", "timestamp": 1660248764, "time_this_iter_s": 30.594375133514404, "time_total_s": 7184.425572395325, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7184.425572395325, "timesteps_since_restore": 716800, "iterations_since_restore": 56, "perf": {"cpu_util_percent": 38.95348837209303, "ram_util_percent": 57.599999999999994}} -{"episode_reward_max": 305.0, "episode_reward_min": 12.0, "episode_reward_mean": 157.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 78.625}, "custom_metrics": {"sparse_reward_mean": 41.6, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 74.05, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.82, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 9.13, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 6.75, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 8.03, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.96, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 5.8, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 7.28, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.51, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.3, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.91, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 5.8, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 7.28, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.8, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 7.28, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0, 270.0, 155.0, 147.0, 75.0, 144.0, 88.0, 37.0, 77.0, 106.0, 117.0, 253.0, 98.0, 179.0, 104.0, 139.0, 152.0, 64.0, 74.0, 273.0, 67.0, 150.0, 163.0, 78.0, 68.0, 141.0, 187.0, 39.0, 158.0, 202.0, 301.0, 153.0, 247.0, 199.0, 163.0, 115.0, 193.0, 203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 138.0, 132.0, 65.0, 90.0, 74.0, 73.0, 33.0, 42.0, 65.0, 79.0, 41.0, 47.0, 14.0, 23.0, 26.0, 51.0, 61.0, 45.0, 55.0, 62.0, 125.0, 128.0, 53.0, 45.0, 81.0, 98.0, 50.0, 54.0, 68.0, 71.0, 80.0, 72.0, 33.0, 31.0, 29.0, 45.0, 142.0, 131.0, 39.0, 28.0, 62.0, 88.0, 88.0, 75.0, 27.0, 51.0, 39.0, 29.0, 71.0, 70.0, 82.0, 105.0, 20.0, 19.0, 66.0, 92.0, 97.0, 105.0, 146.0, 155.0, 71.0, 82.0, 128.0, 119.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0]}, "sampler_perf": {"mean_env_wait_ms": 3.1176592492930246, "mean_processing_ms": 0.6892173986565695, "mean_inference_ms": 3.663860513787198}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1368000, "num_steps_sampled": 729600, "sample_time_ms": 22935.784, "load_time_ms": 37.435, "grad_time_ms": 8913.484, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 2.775557602921922e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004853060003370047, "policy_loss": -0.0074228327721357346, "vf_loss": 33.384822845458984, "vf_explained_var": 0.6208257079124451, "kl": 0.0016279626870527864, "entropy": 1.5374183654785156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 729600, "episodes_total": 1824, "training_iteration": 57, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-13-15", "timestamp": 1660248795, "time_this_iter_s": 30.907179594039917, "time_total_s": 7215.332751989365, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7215.332751989365, "timesteps_since_restore": 729600, "iterations_since_restore": 57, "perf": {"cpu_util_percent": 39.13636363636363, "ram_util_percent": 57.62499999999999}} -{"episode_reward_max": 305.0, "episode_reward_min": 12.0, "episode_reward_mean": 166.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 161.0}, "policy_reward_mean": {"ppo": 83.36}, "custom_metrics": {"sparse_reward_mean": 44.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 77.12, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.95, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 9.49, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.04, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.32, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.99, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.99, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.02, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.65, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.16, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.24, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.65, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.75, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 6.02, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.65, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.02, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.65, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0, 199.0, 163.0, 115.0, 193.0, 203.0, 136.0, 112.0, 259.0, 164.0, 150.0, 167.0, 93.0, 178.0, 227.0, 236.0, 250.0, 218.0, 169.0, 145.0, 159.0, 41.0, 12.0, 144.0, 131.0, 298.0, 47.0, 201.0, 213.0, 176.0, 155.0, 238.0, 115.0, 219.0, 82.0, 216.0, 148.0, 201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 113.0, 86.0, 94.0, 69.0, 62.0, 53.0, 107.0, 86.0, 103.0, 100.0, 70.0, 66.0, 69.0, 43.0, 134.0, 125.0, 81.0, 83.0, 62.0, 88.0, 71.0, 96.0, 39.0, 54.0, 89.0, 89.0, 104.0, 123.0, 124.0, 112.0, 119.0, 131.0, 111.0, 107.0, 73.0, 96.0, 74.0, 71.0, 60.0, 99.0, 19.0, 22.0, 6.0, 6.0, 77.0, 67.0, 66.0, 65.0, 161.0, 137.0, 9.0, 38.0, 92.0, 109.0, 106.0, 107.0, 79.0, 97.0, 74.0, 81.0, 107.0, 131.0, 56.0, 59.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0]}, "sampler_perf": {"mean_env_wait_ms": 3.0715992457802104, "mean_processing_ms": 0.6800724399450163, "mean_inference_ms": 3.6184030024872835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1392000, "num_steps_sampled": 742400, "sample_time_ms": 22627.432, "load_time_ms": 37.419, "grad_time_ms": 8703.731, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 1.387778801460961e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.004274281207472086, "policy_loss": -0.006773001980036497, "vf_loss": 32.668846130371094, "vf_explained_var": 0.6147891879081726, "kl": 0.0016452163690701127, "entropy": 1.5363364219665527, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 742400, "episodes_total": 1856, "training_iteration": 58, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-13-46", "timestamp": 1660248826, "time_this_iter_s": 31.19256901741028, "time_total_s": 7246.525321006775, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7246.525321006775, "timesteps_since_restore": 742400, "iterations_since_restore": 58, "perf": {"cpu_util_percent": 37.85, "ram_util_percent": 57.636363636363626}} -{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 173.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 177.0}, "policy_reward_mean": {"ppo": 86.68}, "custom_metrics": {"sparse_reward_mean": 48.4, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 76.56, "shaped_reward_min": 9, "shaped_reward_max": 110, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.08, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.37, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.26, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.3, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.31, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.56, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.77, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.59, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.64, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 6.31, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.56, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.31, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.56, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0, 219.0, 82.0, 216.0, 148.0, 201.0, 66.0, 180.0, 121.0, 247.0, 190.0, 66.0, 139.0, 139.0, 205.0, 215.0, 267.0, 62.0, 213.0, 155.0, 181.0, 58.0, 174.0, 305.0, 207.0, 155.0, 178.0, 107.0, 166.0, 213.0, 99.0, 152.0, 246.0, 167.0, 110.0, 155.0, 108.0, 120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 106.0, 113.0, 40.0, 42.0, 111.0, 105.0, 55.0, 93.0, 95.0, 106.0, 37.0, 29.0, 94.0, 86.0, 62.0, 59.0, 113.0, 134.0, 76.0, 114.0, 26.0, 40.0, 57.0, 82.0, 57.0, 82.0, 99.0, 106.0, 100.0, 115.0, 121.0, 146.0, 25.0, 37.0, 108.0, 105.0, 76.0, 79.0, 95.0, 86.0, 22.0, 36.0, 85.0, 89.0, 148.0, 157.0, 103.0, 104.0, 71.0, 84.0, 95.0, 83.0, 56.0, 51.0, 85.0, 81.0, 111.0, 102.0, 45.0, 54.0, 70.0, 82.0, 105.0, 141.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0]}, "sampler_perf": {"mean_env_wait_ms": 3.027099137658147, "mean_processing_ms": 0.6712374159631969, "mean_inference_ms": 3.574879048855646}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1416000, "num_steps_sampled": 755200, "sample_time_ms": 22439.192, "load_time_ms": 37.393, "grad_time_ms": 8622.999, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 6.938894007304805e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.005061946343630552, "policy_loss": -0.0077269431203603745, "vf_loss": 34.30827713012695, "vf_explained_var": 0.6426100730895996, "kl": 0.0014075502986088395, "entropy": 1.5316654443740845, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 755200, "episodes_total": 1888, "training_iteration": 59, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-14-19", "timestamp": 1660248859, "time_this_iter_s": 32.82003712654114, "time_total_s": 7279.345358133316, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7279.345358133316, "timesteps_since_restore": 755200, "iterations_since_restore": 59, "perf": {"cpu_util_percent": 38.134782608695645, "ram_util_percent": 57.654347826086926}} -{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 183.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 177.0}, "policy_reward_mean": {"ppo": 91.965}, "custom_metrics": {"sparse_reward_mean": 52.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 78.33, "shaped_reward_min": 9, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.49, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.58, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.62, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.41, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.99, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.64, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 7.61, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.59, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.14, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.85, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.56, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 6.64, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 7.61, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.64, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 7.61, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0, 167.0, 110.0, 155.0, 108.0, 120.0, 115.0, 175.0, 270.0, 118.0, 90.0, 239.0, 267.0, 206.0, 239.0, 256.0, 67.0, 87.0, 93.0, 43.0, 230.0, 164.0, 193.0, 148.0, 222.0, 158.0, 138.0, 144.0, 171.0, 181.0, 241.0, 239.0, 93.0, 167.0, 152.0, 164.0, 263.0, 195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 76.0, 91.0, 65.0, 45.0, 68.0, 87.0, 64.0, 44.0, 65.0, 55.0, 50.0, 65.0, 83.0, 92.0, 134.0, 136.0, 59.0, 59.0, 44.0, 46.0, 107.0, 132.0, 129.0, 138.0, 93.0, 113.0, 119.0, 120.0, 122.0, 134.0, 36.0, 31.0, 36.0, 51.0, 45.0, 48.0, 25.0, 18.0, 113.0, 117.0, 72.0, 92.0, 90.0, 103.0, 82.0, 66.0, 123.0, 99.0, 82.0, 76.0, 52.0, 86.0, 72.0, 72.0, 84.0, 87.0, 93.0, 88.0, 130.0, 111.0, 138.0, 101.0, 45.0, 48.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0]}, "sampler_perf": {"mean_env_wait_ms": 2.9840674048083304, "mean_processing_ms": 0.6627022470508764, "mean_inference_ms": 3.532781736604636}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1440000, "num_steps_sampled": 768000, "sample_time_ms": 22393.025, "load_time_ms": 37.431, "grad_time_ms": 8732.011, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.4694470036524025e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.0032793853897601366, "policy_loss": -0.0061605386435985565, "vf_loss": 36.42392349243164, "vf_explained_var": 0.6542922854423523, "kl": 0.0015746770659461617, "entropy": 1.5224775075912476, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 768000, "episodes_total": 1920, "training_iteration": 60, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-14-51", "timestamp": 1660248891, "time_this_iter_s": 31.919984817504883, "time_total_s": 7311.265342950821, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7311.265342950821, "timesteps_since_restore": 768000, "iterations_since_restore": 60, "perf": {"cpu_util_percent": 38.15, "ram_util_percent": 57.70217391304345}} -{"episode_reward_max": 347.0, "episode_reward_min": 9.0, "episode_reward_mean": 199.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 99.975}, "custom_metrics": {"sparse_reward_mean": 59.6, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 80.75, "shaped_reward_min": 9, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.59, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.82, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.86, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.95, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.94, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.14, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.78, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.59, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.48, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 6.94, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.94, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0, 167.0, 152.0, 164.0, 263.0, 195.0, 147.0, 179.0, 305.0, 302.0, 199.0, 156.0, 201.0, 202.0, 53.0, 144.0, 131.0, 215.0, 347.0, 204.0, 241.0, 209.0, 112.0, 85.0, 238.0, 207.0, 158.0, 193.0, 185.0, 244.0, 185.0, 194.0, 193.0, 170.0, 9.0, 253.0, 115.0, 167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 79.0, 88.0, 66.0, 86.0, 79.0, 85.0, 138.0, 125.0, 87.0, 108.0, 90.0, 57.0, 94.0, 85.0, 140.0, 165.0, 160.0, 142.0, 93.0, 106.0, 81.0, 75.0, 93.0, 108.0, 97.0, 105.0, 25.0, 28.0, 69.0, 75.0, 63.0, 68.0, 127.0, 88.0, 170.0, 177.0, 97.0, 107.0, 122.0, 119.0, 103.0, 106.0, 58.0, 54.0, 40.0, 45.0, 110.0, 128.0, 105.0, 102.0, 66.0, 92.0, 83.0, 110.0, 94.0, 91.0, 119.0, 125.0, 104.0, 81.0, 102.0, 92.0, 91.0, 102.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0]}, "sampler_perf": {"mean_env_wait_ms": 2.942382408677298, "mean_processing_ms": 0.6544447985694, "mean_inference_ms": 3.4920936282643287}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1464000, "num_steps_sampled": 780800, "sample_time_ms": 22508.979, "load_time_ms": 37.523, "grad_time_ms": 8924.852, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.7347235018262012e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013272188371047378, "policy_loss": -0.004394210409373045, "vf_loss": 38.1645622253418, "vf_explained_var": 0.6507807374000549, "kl": 0.002042042789980769, "entropy": 1.4989361763000488, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 780800, "episodes_total": 1952, "training_iteration": 61, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-15-24", "timestamp": 1660248924, "time_this_iter_s": 33.02385997772217, "time_total_s": 7344.289202928543, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7344.289202928543, "timesteps_since_restore": 780800, "iterations_since_restore": 61, "perf": {"cpu_util_percent": 36.91521739130434, "ram_util_percent": 57.791304347826106}} -{"episode_reward_max": 341.0, "episode_reward_min": 9.0, "episode_reward_mean": 206.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 103.06}, "custom_metrics": {"sparse_reward_mean": 60.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 84.52, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.75, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.13, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.0, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.26, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.63, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 7.2, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.37, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.86, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.76, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.44, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 7.2, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.37, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.2, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.37, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0, 170.0, 9.0, 253.0, 115.0, 167.0, 264.0, 176.0, 241.0, 333.0, 82.0, 213.0, 145.0, 167.0, 188.0, 259.0, 307.0, 74.0, 213.0, 299.0, 284.0, 193.0, 136.0, 161.0, 188.0, 238.0, 79.0, 193.0, 241.0, 133.0, 172.0, 238.0, 168.0, 256.0, 213.0, 196.0, 212.0, 255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 83.0, 87.0, 6.0, 3.0, 123.0, 130.0, 64.0, 51.0, 75.0, 92.0, 131.0, 133.0, 79.0, 97.0, 128.0, 113.0, 160.0, 173.0, 35.0, 47.0, 111.0, 102.0, 74.0, 71.0, 78.0, 89.0, 83.0, 105.0, 133.0, 126.0, 161.0, 146.0, 37.0, 37.0, 94.0, 119.0, 155.0, 144.0, 142.0, 142.0, 86.0, 107.0, 65.0, 71.0, 79.0, 82.0, 91.0, 97.0, 129.0, 109.0, 34.0, 45.0, 103.0, 90.0, 128.0, 113.0, 73.0, 60.0, 88.0, 84.0, 120.0, 118.0, 86.0, 82.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0]}, "sampler_perf": {"mean_env_wait_ms": 2.902001918125836, "mean_processing_ms": 0.6464539836010921, "mean_inference_ms": 3.4531930074110506}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1488000, "num_steps_sampled": 793600, "sample_time_ms": 22714.251, "load_time_ms": 37.548, "grad_time_ms": 9167.09, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.673617509131006e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0036004248540848494, "policy_loss": -0.006528293248265982, "vf_loss": 36.78936767578125, "vf_explained_var": 0.6745734810829163, "kl": 0.0014449331210926175, "entropy": 1.5021357536315918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 793600, "episodes_total": 1984, "training_iteration": 62, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-15-59", "timestamp": 1660248959, "time_this_iter_s": 35.12303113937378, "time_total_s": 7379.412234067917, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7379.412234067917, "timesteps_since_restore": 793600, "iterations_since_restore": 62, "perf": {"cpu_util_percent": 35.66, "ram_util_percent": 57.732}} -{"episode_reward_max": 368.0, "episode_reward_min": 9.0, "episode_reward_mean": 220.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 186.0}, "policy_reward_mean": {"ppo": 110.09}, "custom_metrics": {"sparse_reward_mean": 65.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 88.58, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.92, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.43, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 8.21, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.74, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 7.47, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.83, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.6, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.29, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.14, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.73, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.48, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.21, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 7.47, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.83, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.47, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.83, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0, 256.0, 213.0, 196.0, 212.0, 255.0, 247.0, 262.0, 209.0, 147.0, 316.0, 267.0, 155.0, 275.0, 139.0, 256.0, 341.0, 209.0, 204.0, 250.0, 164.0, 86.0, 301.0, 247.0, 249.0, 244.0, 136.0, 123.0, 185.0, 167.0, 99.0, 153.0, 264.0, 210.0, 298.0, 150.0, 241.0, 176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 128.0, 128.0, 113.0, 100.0, 108.0, 88.0, 95.0, 117.0, 115.0, 140.0, 119.0, 128.0, 140.0, 122.0, 100.0, 109.0, 68.0, 79.0, 153.0, 163.0, 127.0, 140.0, 69.0, 86.0, 150.0, 125.0, 78.0, 61.0, 138.0, 118.0, 185.0, 156.0, 118.0, 91.0, 101.0, 103.0, 120.0, 130.0, 91.0, 73.0, 53.0, 33.0, 141.0, 160.0, 135.0, 112.0, 116.0, 133.0, 108.0, 136.0, 63.0, 73.0, 66.0, 57.0, 97.0, 88.0, 87.0, 80.0, 36.0, 63.0, 84.0, 69.0, 127.0, 137.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0]}, "sampler_perf": {"mean_env_wait_ms": 2.8628696517904486, "mean_processing_ms": 0.6387085923696654, "mean_inference_ms": 3.4155991200139666}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1512000, "num_steps_sampled": 806400, "sample_time_ms": 22536.552, "load_time_ms": 37.579, "grad_time_ms": 9310.446, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.336808754565503e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014393635792657733, "policy_loss": -0.0051459651440382, "vf_loss": 44.491573333740234, "vf_explained_var": 0.6412068009376526, "kl": 0.001486484077759087, "entropy": 1.485115885734558, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 806400, "episodes_total": 2016, "training_iteration": 63, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-16-31", "timestamp": 1660248991, "time_this_iter_s": 32.43239998817444, "time_total_s": 7411.844634056091, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7411.844634056091, "timesteps_since_restore": 806400, "iterations_since_restore": 63, "perf": {"cpu_util_percent": 36.01739130434783, "ram_util_percent": 57.59347826086956}} -{"episode_reward_max": 368.0, "episode_reward_min": 9.0, "episode_reward_mean": 232.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 190.0}, "policy_reward_mean": {"ppo": 116.33}, "custom_metrics": {"sparse_reward_mean": 70.6, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 91.46, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.53, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.35, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 8.77, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.08, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.75, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.59, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.51, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.67, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 3.04, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.42, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.32, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.08, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.75, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.08, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.75, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0, 210.0, 298.0, 150.0, 241.0, 176.0, 235.0, 158.0, 301.0, 201.0, 221.0, 176.0, 223.0, 169.0, 313.0, 196.0, 215.0, 118.0, 166.0, 290.0, 81.0, 145.0, 196.0, 161.0, 310.0, 256.0, 298.0, 244.0, 110.0, 319.0, 179.0, 152.0, 207.0, 301.0, 127.0, 307.0, 236.0, 316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 114.0, 96.0, 136.0, 162.0, 76.0, 74.0, 128.0, 113.0, 83.0, 93.0, 110.0, 125.0, 80.0, 78.0, 156.0, 145.0, 100.0, 101.0, 106.0, 115.0, 82.0, 94.0, 107.0, 116.0, 99.0, 70.0, 161.0, 152.0, 102.0, 94.0, 115.0, 100.0, 58.0, 60.0, 84.0, 82.0, 150.0, 140.0, 47.0, 34.0, 71.0, 74.0, 99.0, 97.0, 72.0, 89.0, 139.0, 171.0, 139.0, 117.0, 152.0, 146.0, 119.0, 125.0, 53.0, 57.0, 144.0, 175.0, 94.0, 85.0, 67.0, 85.0, 100.0, 107.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0]}, "sampler_perf": {"mean_env_wait_ms": 2.824862261482626, "mean_processing_ms": 0.6311660417813414, "mean_inference_ms": 3.3776913519982266}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1536000, "num_steps_sampled": 819200, "sample_time_ms": 21992.331, "load_time_ms": 37.617, "grad_time_ms": 9517.458, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.1684043772827515e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0046628438867628574, "policy_loss": -0.008121621794998646, "vf_loss": 41.953346252441406, "vf_explained_var": 0.6826162934303284, "kl": 0.0015492010861635208, "entropy": 1.4731155633926392, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 819200, "episodes_total": 2048, "training_iteration": 64, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-00", "timestamp": 1660249020, "time_this_iter_s": 28.55878710746765, "time_total_s": 7440.403421163559, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7440.403421163559, "timesteps_since_restore": 819200, "iterations_since_restore": 64, "perf": {"cpu_util_percent": 38.065, "ram_util_percent": 57.504999999999995}} -{"episode_reward_max": 390.0, "episode_reward_min": 9.0, "episode_reward_mean": 243.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 201.0}, "policy_reward_mean": {"ppo": 121.615}, "custom_metrics": {"sparse_reward_mean": 74.8, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 93.63, "shaped_reward_min": 9, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.88, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.52, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 9.11, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.77, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.38, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.85, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.62, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.71, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 3.06, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.41, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.36, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.38, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.85, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.38, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.85, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0, 301.0, 127.0, 307.0, 236.0, 316.0, 212.0, 368.0, 304.0, 253.0, 351.0, 178.0, 9.0, 122.0, 253.0, 164.0, 253.0, 213.0, 250.0, 179.0, 219.0, 330.0, 169.0, 241.0, 207.0, 319.0, 241.0, 264.0, 284.0, 307.0, 139.0, 201.0, 267.0, 71.0, 267.0, 241.0, 313.0, 55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 133.0, 168.0, 50.0, 77.0, 165.0, 142.0, 102.0, 134.0, 149.0, 167.0, 107.0, 105.0, 182.0, 186.0, 140.0, 164.0, 124.0, 129.0, 185.0, 166.0, 85.0, 93.0, 6.0, 3.0, 52.0, 70.0, 111.0, 142.0, 55.0, 109.0, 116.0, 137.0, 97.0, 116.0, 129.0, 121.0, 80.0, 99.0, 120.0, 99.0, 166.0, 164.0, 80.0, 89.0, 108.0, 133.0, 98.0, 109.0, 170.0, 149.0, 124.0, 117.0, 125.0, 139.0, 135.0, 149.0, 151.0, 156.0, 71.0, 68.0, 97.0, 104.0, 128.0, 139.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0]}, "sampler_perf": {"mean_env_wait_ms": 2.787957702502074, "mean_processing_ms": 0.623833190465171, "mean_inference_ms": 3.33934185787935}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1560000, "num_steps_sampled": 832000, "sample_time_ms": 21829.313, "load_time_ms": 37.489, "grad_time_ms": 9503.726, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.0842021886413758e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026782825589179993, "policy_loss": -0.0062830038368701935, "vf_loss": 43.38319778442383, "vf_explained_var": 0.6999297738075256, "kl": 0.0015296392375603318, "entropy": 1.4671941995620728, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 832000, "episodes_total": 2080, "training_iteration": 65, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-27", "timestamp": 1660249047, "time_this_iter_s": 27.39732599258423, "time_total_s": 7467.800747156143, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7467.800747156143, "timesteps_since_restore": 832000, "iterations_since_restore": 65, "perf": {"cpu_util_percent": 36.88717948717949, "ram_util_percent": 57.59230769230768}} -{"episode_reward_max": 402.0, "episode_reward_min": 55.0, "episode_reward_mean": 253.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 26.0}, "policy_reward_max": {"ppo": 201.0}, "policy_reward_mean": {"ppo": 126.685}, "custom_metrics": {"sparse_reward_mean": 79.0, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.37, "shaped_reward_min": 31, "shaped_reward_max": 133, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.36, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.55, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 9.6, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 9.72, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.84, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.85, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.47, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.55, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.4, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.84, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.85, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.84, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.85, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0, 71.0, 267.0, 241.0, 313.0, 55.0, 264.0, 127.0, 313.0, 163.0, 287.0, 298.0, 216.0, 341.0, 287.0, 235.0, 298.0, 204.0, 295.0, 225.0, 218.0, 287.0, 215.0, 313.0, 255.0, 367.0, 239.0, 210.0, 367.0, 258.0, 252.0, 353.0, 128.0, 178.0, 284.0, 270.0, 273.0, 207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 34.0, 37.0, 122.0, 145.0, 131.0, 110.0, 163.0, 150.0, 26.0, 29.0, 134.0, 130.0, 63.0, 64.0, 144.0, 169.0, 76.0, 87.0, 142.0, 145.0, 131.0, 167.0, 113.0, 103.0, 152.0, 189.0, 161.0, 126.0, 111.0, 124.0, 164.0, 134.0, 105.0, 99.0, 133.0, 162.0, 119.0, 106.0, 110.0, 108.0, 153.0, 134.0, 100.0, 115.0, 147.0, 166.0, 129.0, 126.0, 190.0, 177.0, 122.0, 117.0, 107.0, 103.0, 189.0, 178.0, 118.0, 140.0, 117.0, 135.0, 188.0, 165.0, 60.0, 68.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0]}, "sampler_perf": {"mean_env_wait_ms": 2.7521510850720086, "mean_processing_ms": 0.6167185711534096, "mean_inference_ms": 3.3015786839102956}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1584000, "num_steps_sampled": 844800, "sample_time_ms": 21629.389, "load_time_ms": 37.73, "grad_time_ms": 9476.984, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.421010943206879e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.005265243351459503, "policy_loss": -0.009142073802649975, "vf_loss": 46.01101303100586, "vf_explained_var": 0.713275671005249, "kl": 0.001622045412659645, "entropy": 1.4485527276992798, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 844800, "episodes_total": 2112, "training_iteration": 66, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-17-56", "timestamp": 1660249076, "time_this_iter_s": 28.3277370929718, "time_total_s": 7496.128484249115, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7496.128484249115, "timesteps_since_restore": 844800, "iterations_since_restore": 66, "perf": {"cpu_util_percent": 32.035000000000004, "ram_util_percent": 57.5875}} -{"episode_reward_max": 421.0, "episode_reward_min": 23.0, "episode_reward_mean": 258.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 211.0}, "policy_reward_mean": {"ppo": 129.21}, "custom_metrics": {"sparse_reward_mean": 81.6, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.22, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.07, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 10.87, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.02, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.65, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.14, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.45, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.65, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.14, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.65, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.14, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0, 178.0, 284.0, 270.0, 273.0, 207.0, 187.0, 252.0, 141.0, 266.0, 299.0, 182.0, 344.0, 352.0, 232.0, 324.0, 260.0, 182.0, 336.0, 170.0, 193.0, 239.0, 255.0, 212.0, 295.0, 267.0, 166.0, 142.0, 65.0, 163.0, 330.0, 361.0, 390.0, 290.0, 259.0, 159.0, 252.0, 282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 86.0, 92.0, 143.0, 141.0, 128.0, 142.0, 131.0, 142.0, 99.0, 108.0, 93.0, 94.0, 123.0, 129.0, 76.0, 65.0, 128.0, 138.0, 144.0, 155.0, 95.0, 87.0, 161.0, 183.0, 182.0, 170.0, 124.0, 108.0, 170.0, 154.0, 119.0, 141.0, 89.0, 93.0, 165.0, 171.0, 97.0, 73.0, 100.0, 93.0, 134.0, 105.0, 127.0, 128.0, 103.0, 109.0, 155.0, 140.0, 118.0, 149.0, 82.0, 84.0, 61.0, 81.0, 32.0, 33.0, 89.0, 74.0, 155.0, 175.0, 168.0, 193.0, 201.0, 189.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0]}, "sampler_perf": {"mean_env_wait_ms": 2.717456296666147, "mean_processing_ms": 0.6098308335159816, "mean_inference_ms": 3.265231126103296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1608000, "num_steps_sampled": 857600, "sample_time_ms": 21370.114, "load_time_ms": 37.697, "grad_time_ms": 9313.497, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.7105054716034394e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019875967409461737, "policy_loss": -0.006022992078214884, "vf_loss": 47.62739562988281, "vf_explained_var": 0.6981029510498047, "kl": 0.0015933552058413625, "entropy": 1.4546891450881958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 857600, "episodes_total": 2144, "training_iteration": 67, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-18-22", "timestamp": 1660249102, "time_this_iter_s": 26.67682385444641, "time_total_s": 7522.805308103561, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7522.805308103561, "timesteps_since_restore": 857600, "iterations_since_restore": 67, "perf": {"cpu_util_percent": 34.505405405405405, "ram_util_percent": 57.59189189189188}} -{"episode_reward_max": 421.0, "episode_reward_min": 23.0, "episode_reward_mean": 275.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 211.0}, "policy_reward_mean": {"ppo": 137.5}, "custom_metrics": {"sparse_reward_mean": 88.8, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 97.4, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.21, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 11.29, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.56, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.48, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.75, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.63, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.73, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.46, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 8.75, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.63, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.75, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.63, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0, 290.0, 259.0, 159.0, 252.0, 282.0, 319.0, 247.0, 267.0, 244.0, 256.0, 305.0, 402.0, 352.0, 210.0, 139.0, 185.0, 293.0, 324.0, 164.0, 244.0, 345.0, 293.0, 333.0, 339.0, 150.0, 206.0, 344.0, 132.0, 204.0, 319.0, 258.0, 307.0, 250.0, 282.0, 298.0, 305.0, 218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 137.0, 153.0, 121.0, 138.0, 84.0, 75.0, 121.0, 131.0, 151.0, 131.0, 158.0, 161.0, 122.0, 125.0, 134.0, 133.0, 112.0, 132.0, 130.0, 126.0, 153.0, 152.0, 201.0, 201.0, 173.0, 179.0, 99.0, 111.0, 72.0, 67.0, 97.0, 88.0, 152.0, 141.0, 168.0, 156.0, 85.0, 79.0, 125.0, 119.0, 168.0, 177.0, 139.0, 154.0, 165.0, 168.0, 182.0, 157.0, 82.0, 68.0, 89.0, 117.0, 170.0, 174.0, 73.0, 59.0, 112.0, 92.0, 136.0, 183.0, 119.0, 139.0, 162.0, 145.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0]}, "sampler_perf": {"mean_env_wait_ms": 2.683811918728699, "mean_processing_ms": 0.6031493203010873, "mean_inference_ms": 3.2301262692347574}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1632000, "num_steps_sampled": 870400, "sample_time_ms": 21057.05, "load_time_ms": 37.776, "grad_time_ms": 9180.759, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.3552527358017197e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.0049219937063753605, "policy_loss": -0.009040978737175465, "vf_loss": 48.340152740478516, "vf_explained_var": 0.6955335140228271, "kl": 0.0016705109737813473, "entropy": 1.430059552192688, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 870400, "episodes_total": 2176, "training_iteration": 68, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-18-49", "timestamp": 1660249129, "time_this_iter_s": 26.73872995376587, "time_total_s": 7549.544038057327, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7549.544038057327, "timesteps_since_restore": 870400, "iterations_since_restore": 68, "perf": {"cpu_util_percent": 34.623684210526314, "ram_util_percent": 57.58947368421052}} -{"episode_reward_max": 462.0, "episode_reward_min": 23.0, "episode_reward_mean": 287.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 236.0}, "policy_reward_mean": {"ppo": 143.99}, "custom_metrics": {"sparse_reward_mean": 94.8, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 98.38, "shaped_reward_min": 20, "shaped_reward_max": 144, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.38, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 11.18, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 10.46, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.86, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.74, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 4.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.84, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.49, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.41, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.86, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.74, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.86, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.74, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0, 250.0, 282.0, 298.0, 305.0, 218.0, 187.0, 60.0, 267.0, 71.0, 244.0, 238.0, 379.0, 373.0, 258.0, 339.0, 198.0, 330.0, 309.0, 358.0, 267.0, 293.0, 298.0, 264.0, 356.0, 309.0, 253.0, 236.0, 421.0, 287.0, 296.0, 384.0, 254.0, 264.0, 208.0, 23.0, 225.0, 330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 119.0, 131.0, 150.0, 132.0, 167.0, 131.0, 158.0, 147.0, 114.0, 104.0, 93.0, 94.0, 29.0, 31.0, 151.0, 116.0, 34.0, 37.0, 113.0, 131.0, 114.0, 124.0, 193.0, 186.0, 188.0, 185.0, 132.0, 126.0, 159.0, 180.0, 96.0, 102.0, 157.0, 173.0, 158.0, 151.0, 180.0, 178.0, 136.0, 131.0, 153.0, 140.0, 146.0, 152.0, 127.0, 137.0, 186.0, 170.0, 151.0, 158.0, 107.0, 146.0, 108.0, 128.0, 211.0, 210.0, 144.0, 143.0, 145.0, 151.0, 204.0, 180.0, 124.0, 130.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0]}, "sampler_perf": {"mean_env_wait_ms": 2.6511088970025942, "mean_processing_ms": 0.5966469783379228, "mean_inference_ms": 3.1956784822649578}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1656000, "num_steps_sampled": 883200, "sample_time_ms": 20666.066, "load_time_ms": 37.699, "grad_time_ms": 8951.146, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 6.776263679008599e-22, "cur_lr": 0.0010000000474974513, "total_loss": -0.007915745489299297, "policy_loss": -0.011840385384857655, "vf_loss": 46.363162994384766, "vf_explained_var": 0.7722532153129578, "kl": 0.0015700907679274678, "entropy": 1.423343300819397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 883200, "episodes_total": 2208, "training_iteration": 69, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-19-16", "timestamp": 1660249156, "time_this_iter_s": 26.615740060806274, "time_total_s": 7576.1597781181335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7576.1597781181335, "timesteps_since_restore": 883200, "iterations_since_restore": 69, "perf": {"cpu_util_percent": 33.539473684210535, "ram_util_percent": 57.605263157894726}} -{"episode_reward_max": 462.0, "episode_reward_min": 23.0, "episode_reward_mean": 296.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 9.0}, "policy_reward_max": {"ppo": 236.0}, "policy_reward_mean": {"ppo": 148.435}, "custom_metrics": {"sparse_reward_mean": 98.6, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 99.67, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.41, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.35, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 9.87, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.64, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.93, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.01, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.68, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.39, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 8.93, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.01, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.93, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.01, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0, 264.0, 208.0, 23.0, 225.0, 330.0, 248.0, 387.0, 301.0, 298.0, 190.0, 359.0, 353.0, 296.0, 365.0, 293.0, 364.0, 236.0, 325.0, 321.0, 259.0, 303.0, 255.0, 273.0, 256.0, 261.0, 290.0, 307.0, 298.0, 359.0, 287.0, 261.0, 249.0, 255.0, 307.0, 176.0, 413.0, 242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 133.0, 131.0, 99.0, 109.0, 9.0, 14.0, 109.0, 116.0, 151.0, 179.0, 118.0, 130.0, 193.0, 194.0, 162.0, 139.0, 139.0, 159.0, 105.0, 85.0, 180.0, 179.0, 172.0, 181.0, 159.0, 137.0, 168.0, 197.0, 137.0, 156.0, 180.0, 184.0, 114.0, 122.0, 171.0, 154.0, 173.0, 148.0, 135.0, 124.0, 149.0, 154.0, 124.0, 131.0, 148.0, 125.0, 111.0, 145.0, 139.0, 122.0, 144.0, 146.0, 153.0, 154.0, 152.0, 146.0, 182.0, 177.0, 156.0, 131.0, 109.0, 152.0, 121.0, 128.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0]}, "sampler_perf": {"mean_env_wait_ms": 2.619364465918477, "mean_processing_ms": 0.59033913982099, "mean_inference_ms": 3.1628679481393043}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1680000, "num_steps_sampled": 896000, "sample_time_ms": 20622.52, "load_time_ms": 37.691, "grad_time_ms": 8717.912, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.3881318395042993e-22, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007909121923148632, "policy_loss": -0.0035155529621988535, "vf_loss": 50.137577056884766, "vf_explained_var": 0.7450786232948303, "kl": 0.0021507267374545336, "entropy": 1.4145766496658325, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 896000, "episodes_total": 2240, "training_iteration": 70, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-19-45", "timestamp": 1660249185, "time_this_iter_s": 29.150850772857666, "time_total_s": 7605.310628890991, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7605.310628890991, "timesteps_since_restore": 896000, "iterations_since_restore": 70, "perf": {"cpu_util_percent": 33.670731707317074, "ram_util_percent": 57.6219512195122}} -{"episode_reward_max": 462.0, "episode_reward_min": 63.0, "episode_reward_mean": 310.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 23.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 155.035}, "custom_metrics": {"sparse_reward_mean": 103.8, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 102.47, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.6, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.29, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.1, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.61, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.29, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 9.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.23, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.33, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 9.29, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 9.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.29, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 9.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0, 255.0, 307.0, 176.0, 413.0, 242.0, 301.0, 234.0, 344.0, 450.0, 312.0, 239.0, 344.0, 359.0, 253.0, 316.0, 255.0, 356.0, 171.0, 396.0, 398.0, 276.0, 402.0, 339.0, 338.0, 353.0, 462.0, 296.0, 237.0, 293.0, 298.0, 231.0, 250.0, 347.0, 63.0, 356.0, 210.0, 249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 130.0, 125.0, 140.0, 167.0, 85.0, 91.0, 207.0, 206.0, 122.0, 120.0, 142.0, 159.0, 117.0, 117.0, 178.0, 166.0, 214.0, 236.0, 150.0, 162.0, 115.0, 124.0, 174.0, 170.0, 187.0, 172.0, 114.0, 139.0, 169.0, 147.0, 122.0, 133.0, 172.0, 184.0, 83.0, 88.0, 186.0, 210.0, 190.0, 208.0, 137.0, 139.0, 193.0, 209.0, 182.0, 157.0, 177.0, 161.0, 193.0, 160.0, 236.0, 226.0, 147.0, 149.0, 125.0, 112.0, 138.0, 155.0, 144.0, 154.0, 111.0, 120.0, 136.0, 114.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5885045396886737, "mean_processing_ms": 0.584211440514898, "mean_inference_ms": 3.1312910646882246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1704000, "num_steps_sampled": 908800, "sample_time_ms": 20412.625, "load_time_ms": 37.645, "grad_time_ms": 8491.272, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.6940659197521496e-22, "cur_lr": 0.0010000000474974513, "total_loss": 9.037616109708324e-05, "policy_loss": -0.004211378749459982, "vf_loss": 49.97343826293945, "vf_explained_var": 0.7645077705383301, "kl": 0.0018662656657397747, "entropy": 1.391157627105713, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 908800, "episodes_total": 2272, "training_iteration": 71, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-20-14", "timestamp": 1660249214, "time_this_iter_s": 28.656519889831543, "time_total_s": 7633.967148780823, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7633.967148780823, "timesteps_since_restore": 908800, "iterations_since_restore": 71, "perf": {"cpu_util_percent": 34.982926829268294, "ram_util_percent": 57.707317073170735}} -{"episode_reward_max": 456.0, "episode_reward_min": 63.0, "episode_reward_mean": 316.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 23.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 158.135}, "custom_metrics": {"sparse_reward_mean": 105.8, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 104.67, "shaped_reward_min": 23, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.74, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.52, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.21, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.81, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.4, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.18, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.36, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.75, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.84, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.38, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.29, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.4, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.18, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.4, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.18, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0, 347.0, 63.0, 356.0, 210.0, 249.0, 68.0, 319.0, 402.0, 327.0, 293.0, 350.0, 396.0, 396.0, 247.0, 387.0, 134.0, 387.0, 301.0, 356.0, 365.0, 308.0, 344.0, 348.0, 333.0, 345.0, 359.0, 259.0, 307.0, 330.0, 269.0, 350.0, 247.0, 356.0, 302.0, 269.0, 68.0, 367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 177.0, 23.0, 40.0, 176.0, 180.0, 108.0, 102.0, 129.0, 120.0, 28.0, 40.0, 165.0, 154.0, 191.0, 211.0, 161.0, 166.0, 152.0, 141.0, 179.0, 171.0, 222.0, 174.0, 205.0, 191.0, 131.0, 116.0, 188.0, 199.0, 60.0, 74.0, 212.0, 175.0, 147.0, 154.0, 170.0, 186.0, 185.0, 180.0, 146.0, 162.0, 167.0, 177.0, 177.0, 171.0, 153.0, 180.0, 169.0, 176.0, 189.0, 170.0, 133.0, 126.0, 139.0, 168.0, 176.0, 154.0, 142.0, 127.0, 183.0, 167.0, 124.0, 123.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0]}, "sampler_perf": {"mean_env_wait_ms": 2.558557313555292, "mean_processing_ms": 0.5782709476223633, "mean_inference_ms": 3.1013750793848702}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1728000, "num_steps_sampled": 921600, "sample_time_ms": 20167.129, "load_time_ms": 37.249, "grad_time_ms": 8246.669, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.470329598760748e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024793706834316254, "policy_loss": -0.007412927225232124, "vf_loss": 56.26578903198242, "vf_explained_var": 0.7433841228485107, "kl": 0.0019004354253411293, "entropy": 1.3860511779785156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 921600, "episodes_total": 2304, "training_iteration": 72, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-20-44", "timestamp": 1660249244, "time_this_iter_s": 30.219820022583008, "time_total_s": 7664.186968803406, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7664.186968803406, "timesteps_since_restore": 921600, "iterations_since_restore": 72, "perf": {"cpu_util_percent": 35.07380952380952, "ram_util_percent": 57.70714285714284}} -{"episode_reward_max": 465.0, "episode_reward_min": 68.0, "episode_reward_mean": 332.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 28.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 166.23}, "custom_metrics": {"sparse_reward_mean": 111.6, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 109.26, "shaped_reward_min": 28, "shaped_reward_max": 145, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.81, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 12.13, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.46, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 9.53, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.79, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.63, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.46, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.07, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.28, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.31, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.53, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.79, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.53, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.79, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0, 356.0, 302.0, 269.0, 68.0, 367.0, 350.0, 230.0, 359.0, 362.0, 348.0, 408.0, 313.0, 345.0, 393.0, 399.0, 324.0, 293.0, 296.0, 396.0, 159.0, 236.0, 264.0, 408.0, 456.0, 270.0, 304.0, 356.0, 327.0, 213.0, 275.0, 350.0, 284.0, 390.0, 237.0, 402.0, 250.0, 408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 176.0, 180.0, 142.0, 160.0, 133.0, 136.0, 28.0, 40.0, 195.0, 172.0, 171.0, 179.0, 123.0, 107.0, 183.0, 176.0, 184.0, 178.0, 178.0, 170.0, 211.0, 197.0, 154.0, 159.0, 174.0, 171.0, 204.0, 189.0, 205.0, 194.0, 167.0, 157.0, 156.0, 137.0, 150.0, 146.0, 206.0, 190.0, 88.0, 71.0, 121.0, 115.0, 125.0, 139.0, 205.0, 203.0, 241.0, 215.0, 135.0, 135.0, 138.0, 166.0, 182.0, 174.0, 162.0, 165.0, 119.0, 94.0, 128.0, 147.0, 177.0, 173.0, 148.0, 136.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5294421557916076, "mean_processing_ms": 0.5725031041756122, "mean_inference_ms": 3.0723080495869532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1752000, "num_steps_sampled": 934400, "sample_time_ms": 20097.062, "load_time_ms": 37.14, "grad_time_ms": 8126.211, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.235164799380374e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0012375875376164913, "policy_loss": -0.006141460034996271, "vf_loss": 55.8723258972168, "vf_explained_var": 0.7437755465507507, "kl": 0.0014161770232021809, "entropy": 1.3667305707931519, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 934400, "episodes_total": 2336, "training_iteration": 73, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-21-14", "timestamp": 1660249274, "time_this_iter_s": 30.526150941848755, "time_total_s": 7694.7131197452545, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7694.7131197452545, "timesteps_since_restore": 934400, "iterations_since_restore": 73, "perf": {"cpu_util_percent": 36.46279069767442, "ram_util_percent": 57.75116279069769}} -{"episode_reward_max": 465.0, "episode_reward_min": 120.0, "episode_reward_mean": 346.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 55.0}, "policy_reward_max": {"ppo": 239.0}, "policy_reward_mean": {"ppo": 173.395}, "custom_metrics": {"sparse_reward_mean": 117.2, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 112.39, "shaped_reward_min": 40, "shaped_reward_max": 147, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.26, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 12.36, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.7, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.66, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 9.87, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.09, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.89, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.37, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.39, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.36, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.29, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.87, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.09, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.87, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.09, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0, 390.0, 237.0, 402.0, 250.0, 408.0, 316.0, 234.0, 245.0, 373.0, 336.0, 450.0, 416.0, 413.0, 356.0, 247.0, 410.0, 182.0, 347.0, 233.0, 316.0, 284.0, 316.0, 242.0, 296.0, 396.0, 142.0, 324.0, 351.0, 359.0, 361.0, 399.0, 353.0, 344.0, 275.0, 410.0, 382.0, 373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 201.0, 189.0, 106.0, 131.0, 195.0, 207.0, 123.0, 127.0, 199.0, 209.0, 151.0, 165.0, 95.0, 139.0, 128.0, 117.0, 175.0, 198.0, 156.0, 180.0, 239.0, 211.0, 208.0, 208.0, 210.0, 203.0, 174.0, 182.0, 123.0, 124.0, 204.0, 206.0, 88.0, 94.0, 178.0, 169.0, 116.0, 117.0, 151.0, 165.0, 149.0, 135.0, 163.0, 153.0, 144.0, 98.0, 151.0, 145.0, 179.0, 217.0, 62.0, 80.0, 156.0, 168.0, 184.0, 167.0, 165.0, 194.0, 182.0, 179.0, 207.0, 192.0, 179.0, 174.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0]}, "sampler_perf": {"mean_env_wait_ms": 2.5011550162167318, "mean_processing_ms": 0.5669031638789668, "mean_inference_ms": 3.044470138401616}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1776000, "num_steps_sampled": 947200, "sample_time_ms": 20475.505, "load_time_ms": 36.886, "grad_time_ms": 8011.391, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.117582399690187e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.002805360360071063, "policy_loss": -0.007409963756799698, "vf_loss": 52.88139724731445, "vf_explained_var": 0.7572636008262634, "kl": 0.0014988663606345654, "entropy": 1.3671082258224487, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 947200, "episodes_total": 2368, "training_iteration": 74, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-21-46", "timestamp": 1660249306, "time_this_iter_s": 31.191842079162598, "time_total_s": 7725.904961824417, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7725.904961824417, "timesteps_since_restore": 947200, "iterations_since_restore": 74, "perf": {"cpu_util_percent": 34.40666666666667, "ram_util_percent": 57.844444444444456}} -{"episode_reward_max": 465.0, "episode_reward_min": 120.0, "episode_reward_mean": 364.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 55.0}, "policy_reward_max": {"ppo": 239.0}, "policy_reward_mean": {"ppo": 182.04}, "custom_metrics": {"sparse_reward_mean": 124.0, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 116.08, "shaped_reward_min": 40, "shaped_reward_max": 147, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.3, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 12.93, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.86, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.23, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.09, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.68, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.56, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.38, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.42, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.62, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 10.09, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.68, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.09, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.68, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0, 344.0, 275.0, 410.0, 382.0, 373.0, 465.0, 251.0, 348.0, 399.0, 284.0, 344.0, 459.0, 416.0, 410.0, 416.0, 358.0, 422.0, 402.0, 405.0, 422.0, 247.0, 296.0, 407.0, 246.0, 120.0, 310.0, 413.0, 338.0, 353.0, 405.0, 416.0, 365.0, 236.0, 287.0, 350.0, 408.0, 358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 178.0, 166.0, 141.0, 134.0, 219.0, 191.0, 188.0, 194.0, 170.0, 203.0, 226.0, 239.0, 131.0, 120.0, 162.0, 186.0, 205.0, 194.0, 132.0, 152.0, 166.0, 178.0, 235.0, 224.0, 205.0, 211.0, 188.0, 222.0, 197.0, 219.0, 177.0, 181.0, 212.0, 210.0, 196.0, 206.0, 213.0, 192.0, 199.0, 223.0, 111.0, 136.0, 147.0, 149.0, 193.0, 214.0, 121.0, 125.0, 55.0, 65.0, 165.0, 145.0, 203.0, 210.0, 170.0, 168.0, 161.0, 192.0, 191.0, 214.0, 213.0, 203.0, 178.0, 187.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4736327447914648, "mean_processing_ms": 0.5614637333952731, "mean_inference_ms": 3.0177932889211685}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1800000, "num_steps_sampled": 960000, "sample_time_ms": 20800.663, "load_time_ms": 36.895, "grad_time_ms": 8168.473, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.0587911998450935e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010099885985255241, "policy_loss": -0.006211612839251757, "vf_loss": 58.7685546875, "vf_explained_var": 0.7208888530731201, "kl": 0.0020332669373601675, "entropy": 1.3504695892333984, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 960000, "episodes_total": 2400, "training_iteration": 75, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-22-18", "timestamp": 1660249338, "time_this_iter_s": 32.21927499771118, "time_total_s": 7758.124236822128, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7758.124236822128, "timesteps_since_restore": 960000, "iterations_since_restore": 75, "perf": {"cpu_util_percent": 32.595555555555556, "ram_util_percent": 57.83555555555553}} -{"episode_reward_max": 507.0, "episode_reward_min": 196.0, "episode_reward_mean": 369.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 258.0}, "policy_reward_mean": {"ppo": 184.535}, "custom_metrics": {"sparse_reward_mean": 125.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 117.47, "shaped_reward_min": 70, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.42, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.2, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 10.97, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.42, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.26, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.75, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.45, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.26, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.75, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.26, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.75, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0, 236.0, 287.0, 350.0, 408.0, 358.0, 413.0, 455.0, 395.0, 402.0, 341.0, 290.0, 427.0, 413.0, 253.0, 355.0, 365.0, 316.0, 458.0, 196.0, 459.0, 398.0, 359.0, 407.0, 322.0, 410.0, 416.0, 390.0, 301.0, 416.0, 279.0, 301.0, 215.0, 359.0, 395.0, 310.0, 339.0, 438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 98.0, 138.0, 150.0, 137.0, 176.0, 174.0, 202.0, 206.0, 187.0, 171.0, 213.0, 200.0, 222.0, 233.0, 208.0, 187.0, 203.0, 199.0, 173.0, 168.0, 128.0, 162.0, 217.0, 210.0, 211.0, 202.0, 119.0, 134.0, 180.0, 175.0, 169.0, 196.0, 160.0, 156.0, 235.0, 223.0, 107.0, 89.0, 229.0, 230.0, 188.0, 210.0, 179.0, 180.0, 199.0, 208.0, 157.0, 165.0, 225.0, 185.0, 208.0, 208.0, 187.0, 203.0, 157.0, 144.0, 204.0, 212.0, 151.0, 128.0, 142.0, 159.0, 108.0, 107.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4468834694970774, "mean_processing_ms": 0.5561781773126093, "mean_inference_ms": 2.992510104410383}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1824000, "num_steps_sampled": 972800, "sample_time_ms": 21085.438, "load_time_ms": 36.907, "grad_time_ms": 8381.058, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 5.293955999225468e-24, "cur_lr": 0.0010000000474974513, "total_loss": -0.000600266270339489, "policy_loss": -0.005276266019791365, "vf_loss": 53.540836334228516, "vf_explained_var": 0.7716453671455383, "kl": 0.0016209534369409084, "entropy": 1.3561688661575317, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 972800, "episodes_total": 2432, "training_iteration": 76, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-22-51", "timestamp": 1660249371, "time_this_iter_s": 33.30055785179138, "time_total_s": 7791.42479467392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7791.42479467392, "timesteps_since_restore": 972800, "iterations_since_restore": 76, "perf": {"cpu_util_percent": 33.693617021276594, "ram_util_percent": 57.704255319148906}} -{"episode_reward_max": 510.0, "episode_reward_min": 208.0, "episode_reward_mean": 377.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 104.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 188.74}, "custom_metrics": {"sparse_reward_mean": 129.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 119.48, "shaped_reward_min": 70, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.62, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.48, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 11.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.72, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.63, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.34, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.03, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.78, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.39, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.19, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.18, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.34, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.03, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.34, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.03, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0, 359.0, 395.0, 310.0, 339.0, 438.0, 390.0, 364.0, 396.0, 413.0, 456.0, 346.0, 453.0, 332.0, 413.0, 251.0, 379.0, 384.0, 464.0, 384.0, 416.0, 401.0, 345.0, 339.0, 344.0, 355.0, 398.0, 419.0, 387.0, 302.0, 299.0, 453.0, 230.0, 362.0, 399.0, 405.0, 396.0, 272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 182.0, 177.0, 195.0, 200.0, 144.0, 166.0, 171.0, 168.0, 216.0, 222.0, 208.0, 182.0, 173.0, 191.0, 206.0, 190.0, 213.0, 200.0, 219.0, 237.0, 169.0, 177.0, 239.0, 214.0, 169.0, 163.0, 209.0, 204.0, 133.0, 118.0, 191.0, 188.0, 176.0, 208.0, 233.0, 231.0, 204.0, 180.0, 211.0, 205.0, 196.0, 205.0, 179.0, 166.0, 156.0, 183.0, 167.0, 177.0, 174.0, 181.0, 192.0, 206.0, 201.0, 218.0, 171.0, 216.0, 149.0, 153.0, 153.0, 146.0, 236.0, 217.0, 111.0, 119.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0]}, "sampler_perf": {"mean_env_wait_ms": 2.4209020546417674, "mean_processing_ms": 0.551048674766232, "mean_inference_ms": 2.9680431709223565}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1848000, "num_steps_sampled": 985600, "sample_time_ms": 21397.355, "load_time_ms": 37.117, "grad_time_ms": 8669.98, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.646977999612734e-24, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010796785354614258, "policy_loss": -0.006340180989354849, "vf_loss": 59.34244918823242, "vf_explained_var": 0.7488496899604797, "kl": 0.0016171737806871533, "entropy": 1.3474963903427124, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 985600, "episodes_total": 2464, "training_iteration": 77, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-23-24", "timestamp": 1660249404, "time_this_iter_s": 32.688453912734985, "time_total_s": 7824.113248586655, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7824.113248586655, "timesteps_since_restore": 985600, "iterations_since_restore": 77, "perf": {"cpu_util_percent": 40.12173913043479, "ram_util_percent": 58.68478260869566}} -{"episode_reward_max": 525.0, "episode_reward_min": 194.0, "episode_reward_mean": 380.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 190.065}, "custom_metrics": {"sparse_reward_mean": 129.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 121.33, "shaped_reward_min": 74, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.11, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.42, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 11.42, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.66, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 10.7, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.85, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.92, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.26, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.73, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.7, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.85, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.7, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.85, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0, 362.0, 399.0, 405.0, 396.0, 272.0, 313.0, 408.0, 444.0, 304.0, 362.0, 304.0, 450.0, 408.0, 370.0, 476.0, 293.0, 453.0, 241.0, 410.0, 453.0, 405.0, 350.0, 350.0, 507.0, 427.0, 393.0, 267.0, 407.0, 419.0, 303.0, 410.0, 378.0, 350.0, 345.0, 376.0, 352.0, 376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 183.0, 179.0, 207.0, 192.0, 197.0, 208.0, 202.0, 194.0, 136.0, 136.0, 156.0, 157.0, 194.0, 214.0, 212.0, 232.0, 153.0, 151.0, 177.0, 185.0, 158.0, 146.0, 219.0, 231.0, 205.0, 203.0, 198.0, 172.0, 250.0, 226.0, 135.0, 158.0, 244.0, 209.0, 125.0, 116.0, 194.0, 216.0, 229.0, 224.0, 201.0, 204.0, 160.0, 190.0, 157.0, 193.0, 258.0, 249.0, 208.0, 219.0, 196.0, 197.0, 138.0, 129.0, 192.0, 215.0, 211.0, 208.0, 140.0, 163.0, 206.0, 204.0, 189.0, 189.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3955189497497584, "mean_processing_ms": 0.5460179273755849, "mean_inference_ms": 2.943733434085924}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1872000, "num_steps_sampled": 998400, "sample_time_ms": 21499.137, "load_time_ms": 37.019, "grad_time_ms": 8919.956, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.323488999806367e-24, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033416959922760725, "policy_loss": -0.003114718245342374, "vf_loss": 71.20785522460938, "vf_explained_var": 0.7243476510047913, "kl": 0.001916095265187323, "entropy": 1.3287501335144043, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 998400, "episodes_total": 2496, "training_iteration": 78, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-23-54", "timestamp": 1660249434, "time_this_iter_s": 30.256299018859863, "time_total_s": 7854.3695476055145, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7854.3695476055145, "timesteps_since_restore": 998400, "iterations_since_restore": 78, "perf": {"cpu_util_percent": 32.448837209302326, "ram_util_percent": 58.1279069767442}} -{"episode_reward_max": 525.0, "episode_reward_min": 194.0, "episode_reward_mean": 380.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 190.095}, "custom_metrics": {"sparse_reward_mean": 130.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 120.19, "shaped_reward_min": 74, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.71, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 11.45, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 16, "useful_onion_pickup_agent_1_mean": 12.72, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.84, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.9, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.51, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.22, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.69, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.26, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.9, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.9, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0, 350.0, 345.0, 376.0, 352.0, 376.0, 305.0, 302.0, 355.0, 447.0, 413.0, 364.0, 453.0, 459.0, 393.0, 368.0, 399.0, 404.0, 402.0, 373.0, 399.0, 381.0, 395.0, 367.0, 404.0, 208.0, 459.0, 287.0, 510.0, 347.0, 324.0, 410.0, 453.0, 404.0, 408.0, 319.0, 344.0, 416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 171.0, 179.0, 177.0, 168.0, 206.0, 170.0, 173.0, 179.0, 191.0, 185.0, 153.0, 152.0, 132.0, 170.0, 182.0, 173.0, 226.0, 221.0, 212.0, 201.0, 181.0, 183.0, 213.0, 240.0, 238.0, 221.0, 191.0, 202.0, 174.0, 194.0, 194.0, 205.0, 208.0, 196.0, 208.0, 194.0, 195.0, 178.0, 205.0, 194.0, 189.0, 192.0, 221.0, 174.0, 189.0, 178.0, 205.0, 199.0, 104.0, 104.0, 224.0, 235.0, 131.0, 156.0, 236.0, 274.0, 180.0, 167.0, 151.0, 173.0, 199.0, 211.0, 222.0, 231.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3706807134794996, "mean_processing_ms": 0.5410767279997776, "mean_inference_ms": 2.9190331024760856}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1896000, "num_steps_sampled": 1011200, "sample_time_ms": 21546.342, "load_time_ms": 37.021, "grad_time_ms": 9020.101, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.617444999031835e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019535624887794256, "policy_loss": -0.0075730024836957455, "vf_loss": 62.825687408447266, "vf_explained_var": 0.7674410939216614, "kl": 0.001638473360799253, "entropy": 1.3262617588043213, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1011200, "episodes_total": 2528, "training_iteration": 79, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-24-22", "timestamp": 1660249462, "time_this_iter_s": 28.0881450176239, "time_total_s": 7882.457692623138, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7882.457692623138, "timesteps_since_restore": 1011200, "iterations_since_restore": 79, "perf": {"cpu_util_percent": 32.120000000000005, "ram_util_percent": 58.1375}} -{"episode_reward_max": 525.0, "episode_reward_min": 9.0, "episode_reward_mean": 390.28, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 195.14}, "custom_metrics": {"sparse_reward_mean": 134.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 122.28, "shaped_reward_min": 9, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.84, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 13.96, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 11.85, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.82, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.91, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.79, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.67, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.22, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.64, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.5, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.27, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 10.79, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.79, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0, 404.0, 408.0, 319.0, 344.0, 416.0, 411.0, 419.0, 358.0, 367.0, 413.0, 427.0, 456.0, 450.0, 461.0, 461.0, 301.0, 405.0, 447.0, 293.0, 285.0, 399.0, 430.0, 422.0, 362.0, 416.0, 525.0, 194.0, 365.0, 407.0, 294.0, 239.0, 304.0, 296.0, 356.0, 387.0, 453.0, 408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 211.0, 193.0, 194.0, 214.0, 162.0, 157.0, 172.0, 172.0, 205.0, 211.0, 205.0, 206.0, 200.0, 219.0, 176.0, 182.0, 191.0, 176.0, 204.0, 209.0, 219.0, 208.0, 225.0, 231.0, 219.0, 231.0, 221.0, 240.0, 231.0, 230.0, 152.0, 149.0, 205.0, 200.0, 234.0, 213.0, 143.0, 150.0, 143.0, 142.0, 202.0, 197.0, 220.0, 210.0, 200.0, 222.0, 185.0, 177.0, 215.0, 201.0, 254.0, 271.0, 100.0, 94.0, 186.0, 179.0, 205.0, 202.0, 151.0, 143.0, 108.0, 131.0, 161.0, 143.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3463084552547957, "mean_processing_ms": 0.5362072894726034, "mean_inference_ms": 2.894108730963018}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1920000, "num_steps_sampled": 1024000, "sample_time_ms": 21336.358, "load_time_ms": 36.943, "grad_time_ms": 8894.576, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.3087224995159173e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009382636635564268, "policy_loss": -0.007344415877014399, "vf_loss": 70.56519317626953, "vf_explained_var": 0.7276310324668884, "kl": 0.001774398609995842, "entropy": 1.3007346391677856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1024000, "episodes_total": 2560, "training_iteration": 80, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-24-48", "timestamp": 1660249488, "time_this_iter_s": 25.79700207710266, "time_total_s": 7908.254694700241, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7908.254694700241, "timesteps_since_restore": 1024000, "iterations_since_restore": 80, "perf": {"cpu_util_percent": 33.88055555555556, "ram_util_percent": 58.030555555555566}} -{"episode_reward_max": 522.0, "episode_reward_min": 9.0, "episode_reward_mean": 403.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 201.98}, "custom_metrics": {"sparse_reward_mean": 139.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 125.96, "shaped_reward_min": 9, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 12.09, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.06, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.96, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.9, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.08, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 12.3, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.23, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 3.96, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.23, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 11.08, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 12.3, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.08, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 12.3, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0, 296.0, 356.0, 387.0, 453.0, 408.0, 398.0, 294.0, 405.0, 291.0, 450.0, 350.0, 419.0, 465.0, 462.0, 312.0, 465.0, 370.0, 416.0, 510.0, 290.0, 237.0, 376.0, 404.0, 407.0, 344.0, 450.0, 462.0, 351.0, 405.0, 333.0, 344.0, 237.0, 288.0, 465.0, 384.0, 353.0, 453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 143.0, 153.0, 171.0, 185.0, 201.0, 186.0, 227.0, 226.0, 199.0, 209.0, 194.0, 204.0, 159.0, 135.0, 208.0, 197.0, 136.0, 155.0, 228.0, 222.0, 162.0, 188.0, 209.0, 210.0, 231.0, 234.0, 217.0, 245.0, 140.0, 172.0, 236.0, 229.0, 184.0, 186.0, 226.0, 190.0, 267.0, 243.0, 150.0, 140.0, 117.0, 120.0, 187.0, 189.0, 197.0, 207.0, 203.0, 204.0, 164.0, 180.0, 221.0, 229.0, 227.0, 235.0, 171.0, 180.0, 199.0, 206.0, 159.0, 174.0, 159.0, 185.0, 119.0, 118.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0]}, "sampler_perf": {"mean_env_wait_ms": 2.3225268766730203, "mean_processing_ms": 0.5314526800460904, "mean_inference_ms": 2.8694240871343926}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1944000, "num_steps_sampled": 1036800, "sample_time_ms": 21260.609, "load_time_ms": 36.811, "grad_time_ms": 8748.654, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.6543612497579586e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011773156002163887, "policy_loss": -0.006681745406240225, "vf_loss": 61.517730712890625, "vf_explained_var": 0.7553827166557312, "kl": 0.0021572383120656013, "entropy": 1.2946891784667969, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1036800, "episodes_total": 2592, "training_iteration": 81, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-25-15", "timestamp": 1660249515, "time_this_iter_s": 26.438808917999268, "time_total_s": 7934.69350361824, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7934.69350361824, "timesteps_since_restore": 1036800, "iterations_since_restore": 81, "perf": {"cpu_util_percent": 30.592105263157894, "ram_util_percent": 58.057894736842115}} -{"episode_reward_max": 522.0, "episode_reward_min": 9.0, "episode_reward_mean": 412.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 206.25}, "custom_metrics": {"sparse_reward_mean": 142.0, "sparse_reward_min": 0, "sparse_reward_max": 180, "shaped_reward_mean": 128.5, "shaped_reward_min": 9, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.49, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.04, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.53, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.01, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.9, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.78, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.56, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.31, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.3, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.19, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 11.56, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.31, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.56, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.31, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0, 288.0, 465.0, 384.0, 353.0, 453.0, 516.0, 456.0, 467.0, 470.0, 410.0, 9.0, 464.0, 390.0, 330.0, 456.0, 516.0, 297.0, 330.0, 450.0, 465.0, 288.0, 413.0, 393.0, 410.0, 455.0, 456.0, 402.0, 455.0, 367.0, 516.0, 441.0, 459.0, 438.0, 408.0, 399.0, 410.0, 459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 140.0, 148.0, 229.0, 236.0, 180.0, 204.0, 175.0, 178.0, 222.0, 231.0, 269.0, 247.0, 214.0, 242.0, 239.0, 228.0, 228.0, 242.0, 198.0, 212.0, 3.0, 6.0, 232.0, 232.0, 188.0, 202.0, 163.0, 167.0, 234.0, 222.0, 253.0, 263.0, 162.0, 135.0, 159.0, 171.0, 221.0, 229.0, 230.0, 235.0, 145.0, 143.0, 202.0, 211.0, 201.0, 192.0, 191.0, 219.0, 225.0, 230.0, 231.0, 225.0, 190.0, 212.0, 219.0, 236.0, 179.0, 188.0, 270.0, 246.0, 227.0, 214.0, 226.0, 233.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0]}, "sampler_perf": {"mean_env_wait_ms": 2.299333204784243, "mean_processing_ms": 0.5268100926342811, "mean_inference_ms": 2.8452761548654255}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1968000, "num_steps_sampled": 1049600, "sample_time_ms": 21056.327, "load_time_ms": 36.751, "grad_time_ms": 8586.844, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 8.271806248789793e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015234133461490273, "policy_loss": -0.007057014852762222, "vf_loss": 61.880123138427734, "vf_explained_var": 0.7578676342964172, "kl": 0.002027077367529273, "entropy": 1.3088246583938599, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1049600, "episodes_total": 2624, "training_iteration": 82, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-25-41", "timestamp": 1660249541, "time_this_iter_s": 26.555142879486084, "time_total_s": 7961.248646497726, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7961.248646497726, "timesteps_since_restore": 1049600, "iterations_since_restore": 82, "perf": {"cpu_util_percent": 33.34324324324324, "ram_util_percent": 58.07837837837838}} -{"episode_reward_max": 522.0, "episode_reward_min": 177.0, "episode_reward_mean": 411.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 205.56}, "custom_metrics": {"sparse_reward_mean": 141.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.92, "shaped_reward_min": 57, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.01, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 12.41, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.19, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.75, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.55, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.37, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.91, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.2, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.11, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.55, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.55, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0, 438.0, 408.0, 399.0, 410.0, 459.0, 342.0, 513.0, 519.0, 410.0, 390.0, 424.0, 458.0, 362.0, 373.0, 399.0, 358.0, 456.0, 421.0, 459.0, 467.0, 419.0, 359.0, 327.0, 419.0, 393.0, 430.0, 447.0, 447.0, 522.0, 398.0, 405.0, 507.0, 468.0, 465.0, 410.0, 344.0, 316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 207.0, 231.0, 198.0, 210.0, 196.0, 203.0, 203.0, 207.0, 235.0, 224.0, 165.0, 177.0, 253.0, 260.0, 262.0, 257.0, 197.0, 213.0, 191.0, 199.0, 199.0, 225.0, 240.0, 218.0, 181.0, 181.0, 193.0, 180.0, 207.0, 192.0, 190.0, 168.0, 231.0, 225.0, 216.0, 205.0, 225.0, 234.0, 230.0, 237.0, 206.0, 213.0, 180.0, 179.0, 155.0, 172.0, 208.0, 211.0, 202.0, 191.0, 219.0, 211.0, 214.0, 233.0, 212.0, 235.0, 265.0, 257.0, 204.0, 194.0, 205.0, 200.0, 257.0, 250.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2767232665412354, "mean_processing_ms": 0.5222866661980672, "mean_inference_ms": 2.821797669244603}, "off_policy_estimator": {}, "info": {"num_steps_trained": 1992000, "num_steps_sampled": 1062400, "sample_time_ms": 20905.085, "load_time_ms": 36.646, "grad_time_ms": 8463.059, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.1359031243948966e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.00031348783522844315, "policy_loss": -0.006104966159909964, "vf_loss": 64.42190551757812, "vf_explained_var": 0.7651865482330322, "kl": 0.0017986185848712921, "entropy": 1.3014076948165894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1062400, "episodes_total": 2656, "training_iteration": 83, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-26-09", "timestamp": 1660249569, "time_this_iter_s": 27.776076078414917, "time_total_s": 7989.024722576141, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 7989.024722576141, "timesteps_since_restore": 1062400, "iterations_since_restore": 83, "perf": {"cpu_util_percent": 33.82000000000001, "ram_util_percent": 58.82000000000001}} -{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 408.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 204.43}, "custom_metrics": {"sparse_reward_mean": 141.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 126.86, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.31, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.9, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.53, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.04, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.74, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.51, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 12.31, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.37, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.51, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 12.31, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.51, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 12.31, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0, 468.0, 465.0, 410.0, 344.0, 316.0, 235.0, 462.0, 501.0, 449.0, 411.0, 357.0, 401.0, 462.0, 408.0, 393.0, 410.0, 347.0, 401.0, 393.0, 444.0, 363.0, 470.0, 470.0, 413.0, 347.0, 459.0, 516.0, 456.0, 476.0, 398.0, 353.0, 401.0, 421.0, 413.0, 300.0, 355.0, 350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 239.0, 229.0, 249.0, 216.0, 211.0, 199.0, 162.0, 182.0, 165.0, 151.0, 116.0, 119.0, 239.0, 223.0, 252.0, 249.0, 229.0, 220.0, 204.0, 207.0, 190.0, 167.0, 210.0, 191.0, 228.0, 234.0, 200.0, 208.0, 214.0, 179.0, 203.0, 207.0, 179.0, 168.0, 176.0, 225.0, 185.0, 208.0, 216.0, 228.0, 160.0, 203.0, 233.0, 237.0, 228.0, 242.0, 204.0, 209.0, 186.0, 161.0, 218.0, 241.0, 251.0, 265.0, 230.0, 226.0, 234.0, 242.0, 185.0, 213.0, 186.0, 167.0, 208.0, 193.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0]}, "sampler_perf": {"mean_env_wait_ms": 2.254677940793998, "mean_processing_ms": 0.5178804318399313, "mean_inference_ms": 2.7992231247898705}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2016000, "num_steps_sampled": 1075200, "sample_time_ms": 20798.01, "load_time_ms": 36.647, "grad_time_ms": 8403.274, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.0679515621974483e-26, "cur_lr": 0.0010000000474974513, "total_loss": 0.001430995762348175, "policy_loss": -0.004144120961427689, "vf_loss": 62.17998123168945, "vf_explained_var": 0.801994264125824, "kl": 0.0024192428681999445, "entropy": 1.2857705354690552, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1075200, "episodes_total": 2688, "training_iteration": 84, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-26-39", "timestamp": 1660249599, "time_this_iter_s": 29.525622129440308, "time_total_s": 8018.550344705582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8018.550344705582, "timesteps_since_restore": 1075200, "iterations_since_restore": 84, "perf": {"cpu_util_percent": 28.77560975609756, "ram_util_percent": 58.31951219512194}} -{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 415.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 207.665}, "custom_metrics": {"sparse_reward_mean": 143.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 128.93, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.2, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.3, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.82, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 12.45, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.83, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 12.45, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 12.45, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0, 421.0, 413.0, 300.0, 355.0, 350.0, 419.0, 422.0, 465.0, 177.0, 450.0, 415.0, 344.0, 282.0, 467.0, 407.0, 404.0, 516.0, 456.0, 396.0, 453.0, 458.0, 418.0, 295.0, 353.0, 406.0, 351.0, 516.0, 519.0, 444.0, 412.0, 444.0, 467.0, 287.0, 406.0, 237.0, 450.0, 126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 225.0, 196.0, 218.0, 195.0, 152.0, 148.0, 188.0, 167.0, 177.0, 173.0, 202.0, 217.0, 200.0, 222.0, 240.0, 225.0, 94.0, 83.0, 227.0, 223.0, 205.0, 210.0, 171.0, 173.0, 139.0, 143.0, 239.0, 228.0, 194.0, 213.0, 189.0, 215.0, 250.0, 266.0, 230.0, 226.0, 213.0, 183.0, 230.0, 223.0, 237.0, 221.0, 205.0, 213.0, 150.0, 145.0, 159.0, 194.0, 206.0, 200.0, 179.0, 172.0, 251.0, 265.0, 255.0, 264.0, 210.0, 234.0, 199.0, 213.0, 233.0, 211.0, 233.0, 234.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2331698554816866, "mean_processing_ms": 0.5135892745725562, "mean_inference_ms": 2.7773374770155983}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2040000, "num_steps_sampled": 1088000, "sample_time_ms": 20542.192, "load_time_ms": 36.513, "grad_time_ms": 8356.672, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.0339757810987241e-26, "cur_lr": 0.0010000000474974513, "total_loss": 9.005811443785205e-05, "policy_loss": -0.005502933170646429, "vf_loss": 62.30662536621094, "vf_explained_var": 0.7652042508125305, "kl": 0.0015233332524076104, "entropy": 1.275335431098938, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1088000, "episodes_total": 2720, "training_iteration": 85, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-27-08", "timestamp": 1660249628, "time_this_iter_s": 29.196897983551025, "time_total_s": 8047.747242689133, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8047.747242689133, "timesteps_since_restore": 1088000, "iterations_since_restore": 85, "perf": {"cpu_util_percent": 31.616666666666664, "ram_util_percent": 58.38809523809524}} -{"episode_reward_max": 573.0, "episode_reward_min": 126.0, "episode_reward_mean": 429.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 214.53}, "custom_metrics": {"sparse_reward_mean": 148.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 133.06, "shaped_reward_min": 46, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.29, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.59, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.5, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.69, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.89, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.77, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.11, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.83, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.33, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.36, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.32, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.12, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.11, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.83, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.11, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.83, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0, 287.0, 406.0, 237.0, 450.0, 126.0, 424.0, 458.0, 418.0, 404.0, 465.0, 228.0, 353.0, 465.0, 350.0, 375.0, 441.0, 422.0, 418.0, 467.0, 441.0, 339.0, 370.0, 404.0, 467.0, 453.0, 458.0, 465.0, 345.0, 476.0, 459.0, 516.0, 573.0, 467.0, 459.0, 393.0, 413.0, 516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 143.0, 144.0, 212.0, 194.0, 120.0, 117.0, 226.0, 224.0, 63.0, 63.0, 224.0, 200.0, 224.0, 234.0, 210.0, 208.0, 202.0, 202.0, 240.0, 225.0, 117.0, 111.0, 173.0, 180.0, 223.0, 242.0, 178.0, 172.0, 181.0, 194.0, 219.0, 222.0, 222.0, 200.0, 207.0, 211.0, 230.0, 237.0, 232.0, 209.0, 165.0, 174.0, 185.0, 185.0, 204.0, 200.0, 230.0, 237.0, 238.0, 215.0, 256.0, 202.0, 236.0, 229.0, 194.0, 151.0, 226.0, 250.0, 231.0, 228.0, 252.0, 264.0, 293.0, 280.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0]}, "sampler_perf": {"mean_env_wait_ms": 2.2121816621028128, "mean_processing_ms": 0.509404154557512, "mean_inference_ms": 2.7561899846531217}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2064000, "num_steps_sampled": 1100800, "sample_time_ms": 20247.187, "load_time_ms": 36.147, "grad_time_ms": 8204.595, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 5.169878905493621e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034205808769911528, "policy_loss": -0.0028535639867186546, "vf_loss": 69.0377197265625, "vf_explained_var": 0.760657787322998, "kl": 0.002043861197307706, "entropy": 1.2592506408691406, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1100800, "episodes_total": 2752, "training_iteration": 86, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-27-37", "timestamp": 1660249657, "time_this_iter_s": 28.82673192024231, "time_total_s": 8076.573974609375, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8076.573974609375, "timesteps_since_restore": 1100800, "iterations_since_restore": 86, "perf": {"cpu_util_percent": 33.515, "ram_util_percent": 58.42750000000001}} -{"episode_reward_max": 570.0, "episode_reward_min": 142.0, "episode_reward_mean": 435.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 291.0}, "policy_reward_mean": {"ppo": 217.895}, "custom_metrics": {"sparse_reward_mean": 150.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 135.79, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.42, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.24, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.97, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.92, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.14, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.18, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 3.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.33, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.05, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.99, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.14, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.18, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.14, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.18, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0, 467.0, 459.0, 393.0, 413.0, 516.0, 287.0, 462.0, 456.0, 449.0, 402.0, 272.0, 452.0, 407.0, 430.0, 450.0, 464.0, 465.0, 461.0, 407.0, 458.0, 404.0, 456.0, 310.0, 470.0, 352.0, 458.0, 376.0, 444.0, 501.0, 495.0, 458.0, 464.0, 447.0, 473.0, 516.0, 384.0, 452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 238.0, 229.0, 222.0, 237.0, 207.0, 186.0, 212.0, 201.0, 256.0, 260.0, 150.0, 137.0, 229.0, 233.0, 230.0, 226.0, 203.0, 246.0, 189.0, 213.0, 146.0, 126.0, 210.0, 242.0, 200.0, 207.0, 221.0, 209.0, 225.0, 225.0, 224.0, 240.0, 231.0, 234.0, 242.0, 219.0, 208.0, 199.0, 220.0, 238.0, 193.0, 211.0, 231.0, 225.0, 140.0, 170.0, 230.0, 240.0, 178.0, 174.0, 224.0, 234.0, 180.0, 196.0, 228.0, 216.0, 252.0, 249.0, 239.0, 256.0, 223.0, 235.0, 242.0, 222.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0]}, "sampler_perf": {"mean_env_wait_ms": 2.191700194557133, "mean_processing_ms": 0.5053220551327391, "mean_inference_ms": 2.7357250964199444}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2088000, "num_steps_sampled": 1113600, "sample_time_ms": 20142.29, "load_time_ms": 35.752, "grad_time_ms": 8101.845, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.5849394527468104e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009223390952683985, "policy_loss": -0.005599660333245993, "vf_loss": 71.4854736328125, "vf_explained_var": 0.7612900733947754, "kl": 0.002260145964100957, "entropy": 1.2530813217163086, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1113600, "episodes_total": 2784, "training_iteration": 87, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-28-07", "timestamp": 1660249687, "time_this_iter_s": 30.60737180709839, "time_total_s": 8107.181346416473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8107.181346416473, "timesteps_since_restore": 1113600, "iterations_since_restore": 87, "perf": {"cpu_util_percent": 30.168181818181814, "ram_util_percent": 58.34545454545453}} -{"episode_reward_max": 570.0, "episode_reward_min": 142.0, "episode_reward_mean": 443.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 291.0}, "policy_reward_mean": {"ppo": 221.745}, "custom_metrics": {"sparse_reward_mean": 152.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 138.69, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.81, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.03, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.16, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.94, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.86, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.54, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 3.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.13, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.54, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.54, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0, 447.0, 473.0, 516.0, 384.0, 452.0, 461.0, 501.0, 241.0, 462.0, 459.0, 519.0, 413.0, 405.0, 302.0, 465.0, 519.0, 498.0, 570.0, 465.0, 444.0, 398.0, 479.0, 419.0, 516.0, 419.0, 522.0, 419.0, 516.0, 470.0, 470.0, 519.0, 418.0, 504.0, 444.0, 355.0, 324.0, 461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 225.0, 222.0, 226.0, 247.0, 249.0, 267.0, 197.0, 187.0, 230.0, 222.0, 235.0, 226.0, 254.0, 247.0, 114.0, 127.0, 234.0, 228.0, 215.0, 244.0, 256.0, 263.0, 208.0, 205.0, 208.0, 197.0, 151.0, 151.0, 239.0, 226.0, 254.0, 265.0, 248.0, 250.0, 279.0, 291.0, 226.0, 239.0, 214.0, 230.0, 200.0, 198.0, 239.0, 240.0, 201.0, 218.0, 250.0, 266.0, 212.0, 207.0, 265.0, 257.0, 208.0, 211.0, 264.0, 252.0, 234.0, 236.0, 238.0, 232.0, 262.0, 257.0, 204.0, 214.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0]}, "sampler_perf": {"mean_env_wait_ms": 2.1716872563286627, "mean_processing_ms": 0.5013361308454948, "mean_inference_ms": 2.7160497473266743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2112000, "num_steps_sampled": 1126400, "sample_time_ms": 20200.821, "load_time_ms": 36.161, "grad_time_ms": 8143.064, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2924697263734052e-27, "cur_lr": 0.0010000000474974513, "total_loss": 0.004522919189184904, "policy_loss": -0.0018036967376247048, "vf_loss": 69.45938110351562, "vf_explained_var": 0.7786126732826233, "kl": 0.001827276311814785, "entropy": 1.2386289834976196, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1126400, "episodes_total": 2816, "training_iteration": 88, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-28-39", "timestamp": 1660249719, "time_this_iter_s": 31.2521071434021, "time_total_s": 8138.4334535598755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8138.4334535598755, "timesteps_since_restore": 1126400, "iterations_since_restore": 88, "perf": {"cpu_util_percent": 29.57045454545455, "ram_util_percent": 58.37954545454544}} -{"episode_reward_max": 530.0, "episode_reward_min": 142.0, "episode_reward_mean": 443.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 70.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 221.975}, "custom_metrics": {"sparse_reward_mean": 152.6, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 138.75, "shaped_reward_min": 62, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.22, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.68, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.81, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.97, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.52, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.12, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.48, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.08, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.52, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.12, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.52, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.12, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0, 504.0, 444.0, 355.0, 324.0, 461.0, 404.0, 464.0, 393.0, 467.0, 459.0, 419.0, 142.0, 419.0, 456.0, 481.0, 507.0, 407.0, 470.0, 513.0, 467.0, 459.0, 419.0, 473.0, 462.0, 316.0, 294.0, 416.0, 513.0, 465.0, 465.0, 424.0, 401.0, 470.0, 419.0, 367.0, 341.0, 302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 240.0, 264.0, 238.0, 206.0, 185.0, 170.0, 157.0, 167.0, 227.0, 234.0, 210.0, 194.0, 236.0, 228.0, 186.0, 207.0, 242.0, 225.0, 226.0, 233.0, 218.0, 201.0, 70.0, 72.0, 202.0, 217.0, 222.0, 234.0, 234.0, 247.0, 235.0, 272.0, 197.0, 210.0, 228.0, 242.0, 245.0, 268.0, 247.0, 220.0, 231.0, 228.0, 211.0, 208.0, 230.0, 243.0, 224.0, 238.0, 162.0, 154.0, 139.0, 155.0, 186.0, 230.0, 265.0, 248.0, 236.0, 229.0, 236.0, 229.0, 223.0, 201.0, 191.0, 210.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0]}, "sampler_perf": {"mean_env_wait_ms": 2.1521722183648633, "mean_processing_ms": 0.4974515268120716, "mean_inference_ms": 2.697593198173253}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2136000, "num_steps_sampled": 1139200, "sample_time_ms": 20564.193, "load_time_ms": 36.199, "grad_time_ms": 8238.148, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.462348631867026e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003657890483736992, "policy_loss": -0.005678663495928049, "vf_loss": 66.65350341796875, "vf_explained_var": 0.7769116759300232, "kl": 0.0020363712683320045, "entropy": 1.2417923212051392, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1139200, "episodes_total": 2848, "training_iteration": 89, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-29-11", "timestamp": 1660249751, "time_this_iter_s": 32.67408323287964, "time_total_s": 8171.107536792755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8171.107536792755, "timesteps_since_restore": 1139200, "iterations_since_restore": 89, "perf": {"cpu_util_percent": 27.073913043478264, "ram_util_percent": 58.2586956521739}} -{"episode_reward_max": 570.0, "episode_reward_min": 273.0, "episode_reward_mean": 452.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 130.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 226.1}, "custom_metrics": {"sparse_reward_mean": 156.0, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 140.2, "shaped_reward_min": 73, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.85, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.39, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.79, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.98, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.77, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 13.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.61, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.47, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.91, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.77, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 13.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.77, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 13.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0, 470.0, 419.0, 367.0, 341.0, 302.0, 366.0, 313.0, 504.0, 522.0, 530.0, 473.0, 468.0, 447.0, 403.0, 522.0, 519.0, 461.0, 516.0, 401.0, 516.0, 459.0, 464.0, 467.0, 373.0, 458.0, 525.0, 513.0, 273.0, 415.0, 461.0, 467.0, 473.0, 522.0, 406.0, 522.0, 467.0, 473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 237.0, 233.0, 207.0, 212.0, 187.0, 180.0, 175.0, 166.0, 151.0, 151.0, 185.0, 181.0, 153.0, 160.0, 262.0, 242.0, 265.0, 257.0, 268.0, 262.0, 253.0, 220.0, 244.0, 224.0, 221.0, 226.0, 192.0, 211.0, 265.0, 257.0, 249.0, 270.0, 237.0, 224.0, 264.0, 252.0, 215.0, 186.0, 259.0, 257.0, 245.0, 214.0, 238.0, 226.0, 229.0, 238.0, 184.0, 189.0, 230.0, 228.0, 262.0, 263.0, 265.0, 248.0, 143.0, 130.0, 208.0, 207.0, 228.0, 233.0, 241.0, 226.0, 232.0, 241.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0]}, "sampler_perf": {"mean_env_wait_ms": 2.133109953902478, "mean_processing_ms": 0.4936599987837839, "mean_inference_ms": 2.6799343629485026}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2160000, "num_steps_sampled": 1152000, "sample_time_ms": 20913.282, "load_time_ms": 36.475, "grad_time_ms": 8548.769, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.231174315933513e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.007687473203986883, "policy_loss": 0.0007542042876593769, "vf_loss": 75.50032806396484, "vf_explained_var": 0.7544476985931396, "kl": 0.0026988324243575335, "entropy": 1.2335320711135864, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1152000, "episodes_total": 2880, "training_iteration": 90, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-29-44", "timestamp": 1660249784, "time_this_iter_s": 32.394510984420776, "time_total_s": 8203.502047777176, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8203.502047777176, "timesteps_since_restore": 1152000, "iterations_since_restore": 90, "perf": {"cpu_util_percent": 31.20434782608696, "ram_util_percent": 58.23260869565217}} -{"episode_reward_max": 570.0, "episode_reward_min": 339.0, "episode_reward_mean": 459.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 163.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 229.53}, "custom_metrics": {"sparse_reward_mean": 158.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 142.26, "shaped_reward_min": 99, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.9, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.54, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.05, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.67, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.94, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 12.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 13.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.3, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.65, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.13, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 13.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 13.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0, 522.0, 406.0, 522.0, 467.0, 473.0, 468.0, 447.0, 473.0, 522.0, 513.0, 470.0, 464.0, 352.0, 504.0, 476.0, 398.0, 358.0, 410.0, 519.0, 405.0, 465.0, 416.0, 406.0, 525.0, 393.0, 459.0, 456.0, 408.0, 465.0, 438.0, 476.0, 462.0, 478.0, 479.0, 507.0, 522.0, 398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 265.0, 257.0, 207.0, 199.0, 255.0, 267.0, 244.0, 223.0, 238.0, 235.0, 234.0, 234.0, 226.0, 221.0, 236.0, 237.0, 257.0, 265.0, 240.0, 273.0, 236.0, 234.0, 248.0, 216.0, 178.0, 174.0, 250.0, 254.0, 231.0, 245.0, 210.0, 188.0, 181.0, 177.0, 202.0, 208.0, 243.0, 276.0, 199.0, 206.0, 244.0, 221.0, 208.0, 208.0, 193.0, 213.0, 259.0, 266.0, 210.0, 183.0, 229.0, 230.0, 234.0, 222.0, 212.0, 196.0, 234.0, 231.0, 224.0, 214.0, 232.0, 244.0, 248.0, 214.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0]}, "sampler_perf": {"mean_env_wait_ms": 2.114456633491147, "mean_processing_ms": 0.4899465714962644, "mean_inference_ms": 2.662192402592387}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2184000, "num_steps_sampled": 1164800, "sample_time_ms": 20895.618, "load_time_ms": 36.529, "grad_time_ms": 8787.695, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.6155871579667565e-28, "cur_lr": 0.0010000000474974513, "total_loss": 0.003199361963197589, "policy_loss": -0.003974525723606348, "vf_loss": 77.90489959716797, "vf_explained_var": 0.7496511936187744, "kl": 0.001869131694547832, "entropy": 1.2332016229629517, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1164800, "episodes_total": 2912, "training_iteration": 91, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-30-12", "timestamp": 1660249812, "time_this_iter_s": 28.652703046798706, "time_total_s": 8232.154750823975, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8232.154750823975, "timesteps_since_restore": 1164800, "iterations_since_restore": 91, "perf": {"cpu_util_percent": 34.4075, "ram_util_percent": 58.25750000000001}} -{"episode_reward_max": 570.0, "episode_reward_min": 296.0, "episode_reward_mean": 465.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 133.0}, "policy_reward_max": {"ppo": 287.0}, "policy_reward_mean": {"ppo": 232.56}, "custom_metrics": {"sparse_reward_mean": 160.6, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 143.92, "shaped_reward_min": 96, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.61, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.75, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.24, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.81, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 14.12, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.77, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.5, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.16, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.18, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 14.12, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 14.12, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0, 478.0, 479.0, 507.0, 522.0, 398.0, 419.0, 462.0, 419.0, 339.0, 519.0, 467.0, 522.0, 450.0, 449.0, 444.0, 495.0, 453.0, 438.0, 462.0, 476.0, 456.0, 413.0, 403.0, 430.0, 459.0, 476.0, 476.0, 458.0, 347.0, 424.0, 476.0, 570.0, 401.0, 516.0, 455.0, 516.0, 510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 241.0, 237.0, 238.0, 241.0, 252.0, 255.0, 255.0, 267.0, 207.0, 191.0, 217.0, 202.0, 231.0, 231.0, 211.0, 208.0, 176.0, 163.0, 260.0, 259.0, 234.0, 233.0, 262.0, 260.0, 238.0, 212.0, 230.0, 219.0, 232.0, 212.0, 261.0, 234.0, 219.0, 234.0, 226.0, 212.0, 237.0, 225.0, 247.0, 229.0, 236.0, 220.0, 200.0, 213.0, 200.0, 203.0, 202.0, 228.0, 225.0, 234.0, 236.0, 240.0, 228.0, 248.0, 242.0, 216.0, 177.0, 170.0, 217.0, 207.0, 231.0, 245.0, 287.0, 283.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0961781849266816, "mean_processing_ms": 0.48630585862921505, "mean_inference_ms": 2.6442580305818364}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2208000, "num_steps_sampled": 1177600, "sample_time_ms": 21020.172, "load_time_ms": 36.643, "grad_time_ms": 9041.599, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 8.077935789833782e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.009082547388970852, "policy_loss": 0.0023276470601558685, "vf_loss": 73.66332244873047, "vf_explained_var": 0.7556483149528503, "kl": 0.003871823428198695, "entropy": 1.2228628396987915, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1177600, "episodes_total": 2944, "training_iteration": 92, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-30-43", "timestamp": 1660249843, "time_this_iter_s": 30.34039807319641, "time_total_s": 8262.495148897171, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8262.495148897171, "timesteps_since_restore": 1177600, "iterations_since_restore": 92, "perf": {"cpu_util_percent": 30.85116279069767, "ram_util_percent": 58.283720930232555}} -{"episode_reward_max": 573.0, "episode_reward_min": 230.0, "episode_reward_mean": 467.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 106.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 233.655}, "custom_metrics": {"sparse_reward_mean": 161.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 144.11, "shaped_reward_min": 70, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.51, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.74, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.27, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.68, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.66, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.2, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 0.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.66, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.2, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.66, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.2, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0, 401.0, 516.0, 455.0, 516.0, 510.0, 411.0, 418.0, 522.0, 465.0, 519.0, 465.0, 408.0, 473.0, 525.0, 522.0, 481.0, 447.0, 527.0, 476.0, 527.0, 449.0, 396.0, 453.0, 453.0, 470.0, 395.0, 481.0, 516.0, 516.0, 449.0, 516.0, 373.0, 416.0, 407.0, 392.0, 516.0, 513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 201.0, 200.0, 264.0, 252.0, 233.0, 222.0, 260.0, 256.0, 265.0, 245.0, 210.0, 201.0, 218.0, 200.0, 267.0, 255.0, 240.0, 225.0, 275.0, 244.0, 218.0, 247.0, 202.0, 206.0, 252.0, 221.0, 257.0, 268.0, 262.0, 260.0, 237.0, 244.0, 217.0, 230.0, 265.0, 262.0, 249.0, 227.0, 259.0, 268.0, 227.0, 222.0, 192.0, 204.0, 233.0, 220.0, 222.0, 231.0, 225.0, 245.0, 195.0, 200.0, 239.0, 242.0, 254.0, 262.0, 259.0, 257.0, 237.0, 212.0, 267.0, 249.0, 184.0, 189.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0782697759005244, "mean_processing_ms": 0.48273945160484155, "mean_inference_ms": 2.6262237371699033}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2232000, "num_steps_sampled": 1190400, "sample_time_ms": 21149.002, "load_time_ms": 36.704, "grad_time_ms": 9227.888, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.038967894916891e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010848678648471832, "policy_loss": -0.005190685391426086, "vf_loss": 68.93277740478516, "vf_explained_var": 0.7608636021614075, "kl": 0.0020486123394221067, "entropy": 1.2354419231414795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1190400, "episodes_total": 2976, "training_iteration": 93, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-31-14", "timestamp": 1660249874, "time_this_iter_s": 30.926449298858643, "time_total_s": 8293.42159819603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8293.42159819603, "timesteps_since_restore": 1190400, "iterations_since_restore": 93, "perf": {"cpu_util_percent": 31.343181818181815, "ram_util_percent": 58.222727272727276}} -{"episode_reward_max": 573.0, "episode_reward_min": 63.0, "episode_reward_mean": 462.26, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 231.13}, "custom_metrics": {"sparse_reward_mean": 160.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 141.86, "shaped_reward_min": 23, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.41, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.74, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.57, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.05, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.39, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.12, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.39, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.41, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.22, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.19, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 12.39, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.12, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.39, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.12, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0, 416.0, 407.0, 392.0, 516.0, 513.0, 453.0, 516.0, 533.0, 296.0, 453.0, 501.0, 422.0, 465.0, 395.0, 530.0, 473.0, 519.0, 458.0, 404.0, 487.0, 449.0, 516.0, 516.0, 510.0, 504.0, 516.0, 516.0, 473.0, 516.0, 519.0, 470.0, 387.0, 522.0, 453.0, 421.0, 438.0, 444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 211.0, 205.0, 198.0, 209.0, 202.0, 190.0, 249.0, 267.0, 257.0, 256.0, 218.0, 235.0, 254.0, 262.0, 273.0, 260.0, 133.0, 163.0, 219.0, 234.0, 235.0, 266.0, 207.0, 215.0, 239.0, 226.0, 195.0, 200.0, 269.0, 261.0, 230.0, 243.0, 263.0, 256.0, 229.0, 229.0, 202.0, 202.0, 247.0, 240.0, 225.0, 224.0, 248.0, 268.0, 250.0, 266.0, 252.0, 258.0, 249.0, 255.0, 272.0, 244.0, 254.0, 262.0, 232.0, 241.0, 260.0, 256.0, 250.0, 269.0, 233.0, 237.0, 189.0, 198.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0]}, "sampler_perf": {"mean_env_wait_ms": 2.060765834369586, "mean_processing_ms": 0.47925634757705055, "mean_inference_ms": 2.608906134199901}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2256000, "num_steps_sampled": 1203200, "sample_time_ms": 21186.356, "load_time_ms": 36.604, "grad_time_ms": 9318.639, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.0194839474584456e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.00026301448815502226, "policy_loss": -0.005903394427150488, "vf_loss": 67.8399658203125, "vf_explained_var": 0.7958834171295166, "kl": 0.0017271721735596657, "entropy": 1.2351765632629395, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1203200, "episodes_total": 3008, "training_iteration": 94, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-31-44", "timestamp": 1660249904, "time_this_iter_s": 30.80340886116028, "time_total_s": 8324.22500705719, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8324.22500705719, "timesteps_since_restore": 1203200, "iterations_since_restore": 94, "perf": {"cpu_util_percent": 33.47727272727272, "ram_util_percent": 58.21363636363637}} -{"episode_reward_max": 573.0, "episode_reward_min": 63.0, "episode_reward_mean": 461.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 230.965}, "custom_metrics": {"sparse_reward_mean": 160.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 141.53, "shaped_reward_min": 23, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.54, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.69, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.8, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.97, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.82, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.44, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 14.16, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.44, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 14.16, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.44, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 14.16, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0, 522.0, 453.0, 421.0, 438.0, 444.0, 410.0, 481.0, 411.0, 456.0, 461.0, 230.0, 370.0, 398.0, 473.0, 513.0, 507.0, 461.0, 522.0, 573.0, 495.0, 516.0, 390.0, 501.0, 519.0, 525.0, 524.0, 479.0, 447.0, 467.0, 464.0, 450.0, 392.0, 522.0, 416.0, 478.0, 510.0, 482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 265.0, 257.0, 233.0, 220.0, 209.0, 212.0, 218.0, 220.0, 224.0, 220.0, 205.0, 205.0, 229.0, 252.0, 210.0, 201.0, 236.0, 220.0, 223.0, 238.0, 124.0, 106.0, 174.0, 196.0, 191.0, 207.0, 241.0, 232.0, 260.0, 253.0, 268.0, 239.0, 227.0, 234.0, 262.0, 260.0, 293.0, 280.0, 250.0, 245.0, 249.0, 267.0, 195.0, 195.0, 255.0, 246.0, 261.0, 258.0, 254.0, 271.0, 262.0, 262.0, 247.0, 232.0, 201.0, 246.0, 230.0, 237.0, 223.0, 241.0, 221.0, 229.0, 197.0, 195.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0]}, "sampler_perf": {"mean_env_wait_ms": 2.043636062637732, "mean_processing_ms": 0.47584888992539853, "mean_inference_ms": 2.592145322205639}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2280000, "num_steps_sampled": 1216000, "sample_time_ms": 21306.082, "load_time_ms": 36.597, "grad_time_ms": 9453.053, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.0097419737292228e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.001733560231514275, "policy_loss": -0.004548916593194008, "vf_loss": 68.90572357177734, "vf_explained_var": 0.7648184895515442, "kl": 0.0019422214245423675, "entropy": 1.216185212135315, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1216000, "episodes_total": 3040, "training_iteration": 95, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-32-16", "timestamp": 1660249936, "time_this_iter_s": 31.733500242233276, "time_total_s": 8355.958507299423, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8355.958507299423, "timesteps_since_restore": 1216000, "iterations_since_restore": 95, "perf": {"cpu_util_percent": 30.170454545454547, "ram_util_percent": 58.22272727272727}} -{"episode_reward_max": 576.0, "episode_reward_min": 63.0, "episode_reward_mean": 464.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 288.0}, "policy_reward_mean": {"ppo": 232.385}, "custom_metrics": {"sparse_reward_mean": 161.0, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 142.77, "shaped_reward_min": 23, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.82, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.82, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.84, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.65, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.83, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.34, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.18, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.09, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.83, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.83, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0, 522.0, 416.0, 478.0, 510.0, 482.0, 525.0, 507.0, 516.0, 478.0, 519.0, 516.0, 410.0, 513.0, 295.0, 401.0, 462.0, 501.0, 519.0, 63.0, 456.0, 381.0, 510.0, 464.0, 461.0, 473.0, 407.0, 570.0, 444.0, 444.0, 384.0, 467.0, 428.0, 456.0, 462.0, 513.0, 519.0, 441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 254.0, 268.0, 211.0, 205.0, 239.0, 239.0, 259.0, 251.0, 250.0, 232.0, 259.0, 266.0, 244.0, 263.0, 261.0, 255.0, 253.0, 225.0, 259.0, 260.0, 251.0, 265.0, 210.0, 200.0, 248.0, 265.0, 140.0, 155.0, 194.0, 207.0, 222.0, 240.0, 252.0, 249.0, 259.0, 260.0, 29.0, 34.0, 206.0, 250.0, 184.0, 197.0, 256.0, 254.0, 239.0, 225.0, 231.0, 230.0, 225.0, 248.0, 204.0, 203.0, 288.0, 282.0, 215.0, 229.0, 226.0, 218.0, 174.0, 210.0, 231.0, 236.0, 222.0, 206.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0268528055210124, "mean_processing_ms": 0.47250865851188434, "mean_inference_ms": 2.5756730471163247}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2304000, "num_steps_sampled": 1228800, "sample_time_ms": 21320.865, "load_time_ms": 36.696, "grad_time_ms": 9593.729, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 5.048709868646114e-30, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013039499754086137, "policy_loss": -0.007722912821918726, "vf_loss": 70.26915740966797, "vf_explained_var": 0.7757861018180847, "kl": 0.001609964296221733, "entropy": 1.2159069776535034, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1228800, "episodes_total": 3072, "training_iteration": 96, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-32-47", "timestamp": 1660249967, "time_this_iter_s": 30.381797075271606, "time_total_s": 8386.340304374695, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8386.340304374695, "timesteps_since_restore": 1228800, "iterations_since_restore": 96, "perf": {"cpu_util_percent": 31.758139534883718, "ram_util_percent": 58.16046511627907}} -{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 470.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 235.265}, "custom_metrics": {"sparse_reward_mean": 163.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 144.13, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.66, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.25, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.85, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.73, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.05, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.55, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.16, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.08, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.05, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.05, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0, 456.0, 462.0, 513.0, 519.0, 441.0, 453.0, 479.0, 405.0, 504.0, 456.0, 525.0, 519.0, 390.0, 379.0, 465.0, 522.0, 530.0, 359.0, 473.0, 453.0, 524.0, 441.0, 407.0, 525.0, 530.0, 453.0, 464.0, 525.0, 422.0, 492.0, 465.0, 398.0, 519.0, 513.0, 464.0, 513.0, 447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 235.0, 221.0, 218.0, 244.0, 251.0, 262.0, 271.0, 248.0, 213.0, 228.0, 227.0, 226.0, 246.0, 233.0, 199.0, 206.0, 253.0, 251.0, 230.0, 226.0, 265.0, 260.0, 258.0, 261.0, 193.0, 197.0, 197.0, 182.0, 228.0, 237.0, 249.0, 273.0, 264.0, 266.0, 183.0, 176.0, 242.0, 231.0, 205.0, 248.0, 262.0, 262.0, 234.0, 207.0, 192.0, 215.0, 264.0, 261.0, 270.0, 260.0, 232.0, 221.0, 236.0, 228.0, 266.0, 259.0, 195.0, 227.0, 257.0, 235.0, 230.0, 235.0, 212.0, 186.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0]}, "sampler_perf": {"mean_env_wait_ms": 2.0104424851575393, "mean_processing_ms": 0.4692484587452608, "mean_inference_ms": 2.5596152283112645}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2328000, "num_steps_sampled": 1241600, "sample_time_ms": 21335.822, "load_time_ms": 37.307, "grad_time_ms": 9680.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.524354934323057e-30, "cur_lr": 0.0010000000474974513, "total_loss": 0.001703931367956102, "policy_loss": -0.005316242575645447, "vf_loss": 76.20516204833984, "vf_explained_var": 0.7805307507514954, "kl": 0.002101513324305415, "entropy": 1.2007073163986206, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1241600, "episodes_total": 3104, "training_iteration": 97, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-33-18", "timestamp": 1660249998, "time_this_iter_s": 31.63303232192993, "time_total_s": 8417.973336696625, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8417.973336696625, "timesteps_since_restore": 1241600, "iterations_since_restore": 97, "perf": {"cpu_util_percent": 34.47777777777778, "ram_util_percent": 58.24}} -{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 478.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 239.385}, "custom_metrics": {"sparse_reward_mean": 166.0, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 146.77, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.02, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.94, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.14, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.12, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.32, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.63, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.13, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.12, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.32, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.12, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.32, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0, 519.0, 513.0, 464.0, 513.0, 447.0, 513.0, 487.0, 413.0, 348.0, 492.0, 421.0, 470.0, 476.0, 447.0, 504.0, 412.0, 470.0, 519.0, 516.0, 533.0, 404.0, 441.0, 447.0, 576.0, 459.0, 510.0, 408.0, 510.0, 329.0, 450.0, 510.0, 516.0, 525.0, 510.0, 450.0, 484.0, 525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 264.0, 255.0, 250.0, 263.0, 229.0, 235.0, 253.0, 260.0, 217.0, 230.0, 252.0, 261.0, 246.0, 241.0, 204.0, 209.0, 166.0, 182.0, 240.0, 252.0, 210.0, 211.0, 230.0, 240.0, 228.0, 248.0, 232.0, 215.0, 258.0, 246.0, 224.0, 188.0, 228.0, 242.0, 265.0, 254.0, 249.0, 267.0, 267.0, 266.0, 202.0, 202.0, 235.0, 206.0, 212.0, 235.0, 288.0, 288.0, 225.0, 234.0, 248.0, 262.0, 219.0, 189.0, 257.0, 253.0, 177.0, 152.0, 229.0, 221.0, 259.0, 251.0, 254.0, 262.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9943851070346994, "mean_processing_ms": 0.4660638204377501, "mean_inference_ms": 2.5438091293770433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2352000, "num_steps_sampled": 1254400, "sample_time_ms": 21313.715, "load_time_ms": 36.778, "grad_time_ms": 9499.638, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.2621774671615285e-30, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019398670410737395, "policy_loss": -0.005300960969179869, "vf_loss": 78.3524398803711, "vf_explained_var": 0.7676915526390076, "kl": 0.0015995064750313759, "entropy": 1.188806414604187, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1254400, "episodes_total": 3136, "training_iteration": 98, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-33-47", "timestamp": 1660250027, "time_this_iter_s": 29.219672203063965, "time_total_s": 8447.193008899689, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8447.193008899689, "timesteps_since_restore": 1254400, "iterations_since_restore": 98, "perf": {"cpu_util_percent": 33.91190476190476, "ram_util_percent": 58.29285714285714}} -{"episode_reward_max": 579.0, "episode_reward_min": 128.0, "episode_reward_mean": 485.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 294.0}, "policy_reward_mean": {"ppo": 242.725}, "custom_metrics": {"sparse_reward_mean": 168.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 148.65, "shaped_reward_min": 48, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.85, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.04, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.74, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.11, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.11, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.11, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0, 525.0, 510.0, 450.0, 484.0, 525.0, 522.0, 522.0, 462.0, 446.0, 519.0, 573.0, 352.0, 473.0, 519.0, 507.0, 524.0, 519.0, 467.0, 513.0, 522.0, 504.0, 519.0, 456.0, 453.0, 168.0, 516.0, 476.0, 470.0, 507.0, 570.0, 516.0, 444.0, 128.0, 510.0, 579.0, 317.0, 473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 261.0, 264.0, 267.0, 243.0, 221.0, 229.0, 244.0, 240.0, 257.0, 268.0, 252.0, 270.0, 258.0, 264.0, 238.0, 224.0, 227.0, 219.0, 246.0, 273.0, 285.0, 288.0, 168.0, 184.0, 240.0, 233.0, 266.0, 253.0, 258.0, 249.0, 249.0, 275.0, 259.0, 260.0, 240.0, 227.0, 246.0, 267.0, 267.0, 255.0, 256.0, 248.0, 269.0, 250.0, 225.0, 231.0, 225.0, 228.0, 85.0, 83.0, 265.0, 251.0, 232.0, 244.0, 222.0, 248.0, 253.0, 254.0, 286.0, 284.0, 256.0, 260.0, 223.0, 221.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0]}, "sampler_perf": {"mean_env_wait_ms": 1.978664058751599, "mean_processing_ms": 0.4629441310230788, "mean_inference_ms": 2.5282732021268366}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2376000, "num_steps_sampled": 1267200, "sample_time_ms": 21031.143, "load_time_ms": 36.859, "grad_time_ms": 9358.331, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 6.3108873358076425e-31, "cur_lr": 0.0010000000474974513, "total_loss": 0.00523378886282444, "policy_loss": -0.0017726494697853923, "vf_loss": 76.06880950927734, "vf_explained_var": 0.753373920917511, "kl": 0.001648509525693953, "entropy": 1.2008789777755737, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1267200, "episodes_total": 3168, "training_iteration": 99, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-34-16", "timestamp": 1660250056, "time_this_iter_s": 28.433568000793457, "time_total_s": 8475.626576900482, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8475.626576900482, "timesteps_since_restore": 1267200, "iterations_since_restore": 99, "perf": {"cpu_util_percent": 33.795, "ram_util_percent": 58.2625}} -{"episode_reward_max": 582.0, "episode_reward_min": 128.0, "episode_reward_mean": 480.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 63.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 240.47}, "custom_metrics": {"sparse_reward_mean": 166.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 147.34, "shaped_reward_min": 48, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.37, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.59, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.1, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.32, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.21, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.92, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.23, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.32, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.21, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.32, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.21, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0, 128.0, 510.0, 579.0, 317.0, 473.0, 522.0, 510.0, 525.0, 513.0, 525.0, 459.0, 410.0, 504.0, 522.0, 522.0, 507.0, 237.0, 527.0, 525.0, 472.0, 501.0, 459.0, 450.0, 530.0, 519.0, 525.0, 530.0, 579.0, 522.0, 519.0, 479.0, 573.0, 530.0, 513.0, 344.0, 447.0, 458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 65.0, 63.0, 256.0, 254.0, 294.0, 285.0, 162.0, 155.0, 232.0, 241.0, 262.0, 260.0, 252.0, 258.0, 255.0, 270.0, 260.0, 253.0, 259.0, 266.0, 227.0, 232.0, 195.0, 215.0, 254.0, 250.0, 258.0, 264.0, 254.0, 268.0, 248.0, 259.0, 117.0, 120.0, 261.0, 266.0, 259.0, 266.0, 227.0, 245.0, 248.0, 253.0, 232.0, 227.0, 233.0, 217.0, 267.0, 263.0, 265.0, 254.0, 264.0, 261.0, 265.0, 265.0, 291.0, 288.0, 262.0, 260.0, 259.0, 260.0, 244.0, 235.0, 290.0, 283.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9632342892537746, "mean_processing_ms": 0.4598755283783044, "mean_inference_ms": 2.5128753018749035}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2400000, "num_steps_sampled": 1280000, "sample_time_ms": 20819.102, "load_time_ms": 36.792, "grad_time_ms": 9258.115, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.1554436679038213e-31, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015751657774671912, "policy_loss": -0.008594638668000698, "vf_loss": 76.2179183959961, "vf_explained_var": 0.7723303437232971, "kl": 0.002320505678653717, "entropy": 1.2046717405319214, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1280000, "episodes_total": 3200, "training_iteration": 100, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-34-45", "timestamp": 1660250085, "time_this_iter_s": 29.270292043685913, "time_total_s": 8504.896868944168, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8504.896868944168, "timesteps_since_restore": 1280000, "iterations_since_restore": 100, "perf": {"cpu_util_percent": 32.380487804878044, "ram_util_percent": 58.27560975609755}} -{"episode_reward_max": 582.0, "episode_reward_min": 177.0, "episode_reward_mean": 476.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 238.185}, "custom_metrics": {"sparse_reward_mean": 165.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 145.97, "shaped_reward_min": 57, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.43, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.61, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.93, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.79, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 1.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.83, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.36, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.91, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.36, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.91, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.36, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.91, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0, 530.0, 513.0, 344.0, 447.0, 458.0, 473.0, 516.0, 522.0, 524.0, 570.0, 453.0, 476.0, 458.0, 522.0, 576.0, 498.0, 519.0, 406.0, 516.0, 533.0, 450.0, 473.0, 525.0, 525.0, 464.0, 447.0, 441.0, 479.0, 467.0, 504.0, 507.0, 513.0, 519.0, 406.0, 467.0, 498.0, 525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 255.0, 275.0, 253.0, 260.0, 175.0, 169.0, 217.0, 230.0, 217.0, 241.0, 237.0, 236.0, 268.0, 248.0, 250.0, 272.0, 256.0, 268.0, 285.0, 285.0, 233.0, 220.0, 231.0, 245.0, 227.0, 231.0, 263.0, 259.0, 288.0, 288.0, 245.0, 253.0, 257.0, 262.0, 197.0, 209.0, 256.0, 260.0, 251.0, 282.0, 246.0, 204.0, 232.0, 241.0, 261.0, 264.0, 263.0, 262.0, 223.0, 241.0, 221.0, 226.0, 224.0, 217.0, 241.0, 238.0, 233.0, 234.0, 253.0, 251.0, 259.0, 248.0, 256.0, 257.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9480966486925166, "mean_processing_ms": 0.45685958478274097, "mean_inference_ms": 2.497748516500124}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2424000, "num_steps_sampled": 1292800, "sample_time_ms": 20962.686, "load_time_ms": 36.873, "grad_time_ms": 9154.171, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.5777218339519106e-31, "cur_lr": 0.0010000000474974513, "total_loss": 0.00011571295181056485, "policy_loss": -0.006676681339740753, "vf_loss": 73.92855834960938, "vf_explained_var": 0.7952176928520203, "kl": 0.0016933353617787361, "entropy": 1.2009243965148926, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1292800, "episodes_total": 3232, "training_iteration": 101, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-35-15", "timestamp": 1660250115, "time_this_iter_s": 29.051042795181274, "time_total_s": 8533.94791173935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8533.94791173935, "timesteps_since_restore": 1292800, "iterations_since_restore": 101, "perf": {"cpu_util_percent": 29.43571428571429, "ram_util_percent": 58.27142857142857}} -{"episode_reward_max": 582.0, "episode_reward_min": 177.0, "episode_reward_mean": 479.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 239.545}, "custom_metrics": {"sparse_reward_mean": 166.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 147.09, "shaped_reward_min": 57, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.17, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.32, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.87, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.26, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.29, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0, 519.0, 406.0, 467.0, 498.0, 525.0, 504.0, 524.0, 513.0, 347.0, 522.0, 355.0, 336.0, 533.0, 459.0, 522.0, 384.0, 576.0, 567.0, 177.0, 582.0, 467.0, 470.0, 519.0, 533.0, 398.0, 510.0, 513.0, 441.0, 398.0, 516.0, 409.0, 507.0, 525.0, 516.0, 530.0, 404.0, 447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 254.0, 265.0, 185.0, 221.0, 229.0, 238.0, 234.0, 264.0, 252.0, 273.0, 265.0, 239.0, 251.0, 273.0, 254.0, 259.0, 188.0, 159.0, 260.0, 262.0, 168.0, 187.0, 151.0, 185.0, 275.0, 258.0, 239.0, 220.0, 264.0, 258.0, 187.0, 197.0, 288.0, 288.0, 270.0, 297.0, 89.0, 88.0, 284.0, 298.0, 241.0, 226.0, 241.0, 229.0, 259.0, 260.0, 254.0, 279.0, 194.0, 204.0, 259.0, 251.0, 256.0, 257.0, 227.0, 214.0, 186.0, 212.0, 240.0, 276.0, 193.0, 216.0, 242.0, 265.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9332518889221049, "mean_processing_ms": 0.4539033753742656, "mean_inference_ms": 2.483090315311066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2448000, "num_steps_sampled": 1305600, "sample_time_ms": 21004.371, "load_time_ms": 37.153, "grad_time_ms": 9035.152, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.888609169759553e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.00245770625770092, "policy_loss": -0.003976076375693083, "vf_loss": 70.27108764648438, "vf_explained_var": 0.7766384482383728, "kl": 0.001931712031364441, "entropy": 1.1866337060928345, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1305600, "episodes_total": 3264, "training_iteration": 102, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-35-45", "timestamp": 1660250145, "time_this_iter_s": 29.56961703300476, "time_total_s": 8563.517528772354, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8563.517528772354, "timesteps_since_restore": 1305600, "iterations_since_restore": 102, "perf": {"cpu_util_percent": 29.607142857142858, "ram_util_percent": 58.27380952380951}} -{"episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 488.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 244.08}, "custom_metrics": {"sparse_reward_mean": 169.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 149.76, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.19, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.44, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.55, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.85, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.33, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.56, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.2, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.43, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 13.33, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.56, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.33, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.56, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0, 525.0, 516.0, 530.0, 404.0, 447.0, 450.0, 522.0, 513.0, 519.0, 351.0, 573.0, 425.0, 473.0, 530.0, 479.0, 404.0, 470.0, 522.0, 525.0, 516.0, 197.0, 458.0, 519.0, 464.0, 180.0, 299.0, 519.0, 527.0, 506.0, 516.0, 522.0, 473.0, 570.0, 504.0, 530.0, 513.0, 530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 259.0, 266.0, 262.0, 254.0, 276.0, 254.0, 205.0, 199.0, 225.0, 222.0, 232.0, 218.0, 254.0, 268.0, 254.0, 259.0, 258.0, 261.0, 180.0, 171.0, 288.0, 285.0, 221.0, 204.0, 256.0, 217.0, 269.0, 261.0, 236.0, 243.0, 197.0, 207.0, 242.0, 228.0, 260.0, 262.0, 262.0, 263.0, 270.0, 246.0, 102.0, 95.0, 217.0, 241.0, 268.0, 251.0, 217.0, 247.0, 86.0, 94.0, 143.0, 156.0, 262.0, 257.0, 259.0, 268.0, 261.0, 245.0, 241.0, 275.0, 260.0, 262.0, 220.0, 253.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9186906528346697, "mean_processing_ms": 0.45100660003140314, "mean_inference_ms": 2.4688879649561444}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2472000, "num_steps_sampled": 1318400, "sample_time_ms": 21037.799, "load_time_ms": 37.038, "grad_time_ms": 8875.986, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.9443045848797766e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011967640602961183, "policy_loss": -0.0056260935962200165, "vf_loss": 74.17142486572266, "vf_explained_var": 0.7644608616828918, "kl": 0.0018772757612168789, "entropy": 1.188565731048584, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1318400, "episodes_total": 3296, "training_iteration": 103, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-36-14", "timestamp": 1660250174, "time_this_iter_s": 29.66763973236084, "time_total_s": 8593.185168504715, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8593.185168504715, "timesteps_since_restore": 1318400, "iterations_since_restore": 103, "perf": {"cpu_util_percent": 31.43809523809524, "ram_util_percent": 58.23095238095237}} -{"episode_reward_max": 579.0, "episode_reward_min": 350.0, "episode_reward_mean": 503.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 251.985}, "custom_metrics": {"sparse_reward_mean": 175.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 153.17, "shaped_reward_min": 104, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.84, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.76, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.58, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 14.95, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.34, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.58, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 14.95, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.58, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 14.95, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0, 570.0, 504.0, 530.0, 513.0, 530.0, 487.0, 513.0, 570.0, 513.0, 570.0, 522.0, 476.0, 446.0, 453.0, 459.0, 522.0, 504.0, 468.0, 516.0, 479.0, 464.0, 522.0, 516.0, 467.0, 444.0, 401.0, 522.0, 510.0, 530.0, 513.0, 482.0, 507.0, 516.0, 516.0, 513.0, 470.0, 519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 285.0, 285.0, 240.0, 264.0, 272.0, 258.0, 244.0, 269.0, 261.0, 269.0, 234.0, 253.0, 261.0, 252.0, 282.0, 288.0, 251.0, 262.0, 287.0, 283.0, 265.0, 257.0, 243.0, 233.0, 212.0, 234.0, 244.0, 209.0, 226.0, 233.0, 254.0, 268.0, 247.0, 257.0, 236.0, 232.0, 265.0, 251.0, 240.0, 239.0, 226.0, 238.0, 259.0, 263.0, 237.0, 279.0, 233.0, 234.0, 213.0, 231.0, 196.0, 205.0, 246.0, 276.0, 251.0, 259.0, 272.0, 258.0, 256.0, 257.0, 226.0, 256.0, 244.0, 263.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0]}, "sampler_perf": {"mean_env_wait_ms": 1.9044104614216666, "mean_processing_ms": 0.44816608164827715, "mean_inference_ms": 2.4550939840364316}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2496000, "num_steps_sampled": 1331200, "sample_time_ms": 21059.641, "load_time_ms": 36.972, "grad_time_ms": 8872.978, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.9721522924398883e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.002359784208238125, "policy_loss": -0.004356598015874624, "vf_loss": 73.04959106445312, "vf_explained_var": 0.7670376896858215, "kl": 0.0017897128127515316, "entropy": 1.1771515607833862, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1331200, "episodes_total": 3328, "training_iteration": 104, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-36-45", "timestamp": 1660250205, "time_this_iter_s": 30.991883993148804, "time_total_s": 8624.177052497864, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8624.177052497864, "timesteps_since_restore": 1331200, "iterations_since_restore": 104, "perf": {"cpu_util_percent": 35.49545454545455, "ram_util_percent": 58.338636363636354}} -{"episode_reward_max": 579.0, "episode_reward_min": 350.0, "episode_reward_mean": 506.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 253.335}, "custom_metrics": {"sparse_reward_mean": 176.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 153.87, "shaped_reward_min": 104, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.18, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.08, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.52, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.78, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.76, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.64, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.78, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.76, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.78, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.76, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0, 516.0, 516.0, 513.0, 470.0, 519.0, 525.0, 396.0, 513.0, 479.0, 455.0, 525.0, 518.0, 519.0, 350.0, 579.0, 516.0, 519.0, 504.0, 455.0, 510.0, 384.0, 570.0, 498.0, 473.0, 519.0, 576.0, 516.0, 573.0, 465.0, 510.0, 533.0, 504.0, 525.0, 450.0, 482.0, 444.0, 573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 267.0, 249.0, 260.0, 256.0, 251.0, 262.0, 233.0, 237.0, 249.0, 270.0, 262.0, 263.0, 196.0, 200.0, 259.0, 254.0, 234.0, 245.0, 226.0, 229.0, 260.0, 265.0, 267.0, 251.0, 251.0, 268.0, 176.0, 174.0, 285.0, 294.0, 259.0, 257.0, 262.0, 257.0, 260.0, 244.0, 219.0, 236.0, 251.0, 259.0, 179.0, 205.0, 277.0, 293.0, 250.0, 248.0, 244.0, 229.0, 256.0, 263.0, 280.0, 296.0, 254.0, 262.0, 290.0, 283.0, 242.0, 223.0, 264.0, 246.0, 260.0, 273.0, 245.0, 259.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0]}, "sampler_perf": {"mean_env_wait_ms": 1.890394604549175, "mean_processing_ms": 0.4453764179105544, "mean_inference_ms": 2.441389523770417}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2520000, "num_steps_sampled": 1344000, "sample_time_ms": 20963.704, "load_time_ms": 37.317, "grad_time_ms": 8768.329, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.860761462199441e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.002573954639956355, "policy_loss": -0.004460552707314491, "vf_loss": 76.1985855102539, "vf_explained_var": 0.7691051959991455, "kl": 0.002485529985278845, "entropy": 1.1707229614257812, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1344000, "episodes_total": 3360, "training_iteration": 105, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-37-15", "timestamp": 1660250235, "time_this_iter_s": 29.730670928955078, "time_total_s": 8653.907723426819, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8653.907723426819, "timesteps_since_restore": 1344000, "iterations_since_restore": 105, "perf": {"cpu_util_percent": 34.530952380952385, "ram_util_percent": 58.22619047619047}} -{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 508.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 254.47}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 154.94, "shaped_reward_min": 110, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.45, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.97, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.88, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.73, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.27, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.19, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.88, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.73, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.88, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.73, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0, 525.0, 450.0, 482.0, 444.0, 573.0, 573.0, 513.0, 519.0, 393.0, 564.0, 570.0, 570.0, 513.0, 513.0, 522.0, 492.0, 519.0, 413.0, 570.0, 482.0, 570.0, 510.0, 513.0, 462.0, 522.0, 498.0, 570.0, 498.0, 518.0, 510.0, 522.0, 504.0, 524.0, 536.0, 447.0, 452.0, 467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 267.0, 258.0, 222.0, 228.0, 236.0, 246.0, 240.0, 204.0, 293.0, 280.0, 293.0, 280.0, 255.0, 258.0, 259.0, 260.0, 194.0, 199.0, 272.0, 292.0, 295.0, 275.0, 278.0, 292.0, 250.0, 263.0, 265.0, 248.0, 264.0, 258.0, 246.0, 246.0, 259.0, 260.0, 217.0, 196.0, 276.0, 294.0, 249.0, 233.0, 272.0, 298.0, 243.0, 267.0, 259.0, 254.0, 237.0, 225.0, 249.0, 273.0, 255.0, 243.0, 293.0, 277.0, 250.0, 248.0, 248.0, 270.0, 259.0, 251.0, 260.0, 262.0, 251.0, 253.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8766403613441696, "mean_processing_ms": 0.44263891296008706, "mean_inference_ms": 2.427842279334728}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2544000, "num_steps_sampled": 1356800, "sample_time_ms": 20984.654, "load_time_ms": 37.36, "grad_time_ms": 8798.762, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 4.930380731099721e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012900071451440454, "policy_loss": -0.00579724321141839, "vf_loss": 76.69783782958984, "vf_explained_var": 0.7642709612846375, "kl": 0.0020595567766577005, "entropy": 1.1650750637054443, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1356800, "episodes_total": 3392, "training_iteration": 106, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-37-46", "timestamp": 1660250266, "time_this_iter_s": 30.89556574821472, "time_total_s": 8684.803289175034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8684.803289175034, "timesteps_since_restore": 1356800, "iterations_since_restore": 106, "perf": {"cpu_util_percent": 35.19772727272727, "ram_util_percent": 58.284090909090885}} -{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 509.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 293.0}, "policy_reward_mean": {"ppo": 254.605}, "custom_metrics": {"sparse_reward_mean": 176.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 155.61, "shaped_reward_min": 110, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.17, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.24, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.15, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0, 524.0, 536.0, 447.0, 452.0, 467.0, 525.0, 527.0, 464.0, 516.0, 459.0, 516.0, 570.0, 519.0, 522.0, 416.0, 504.0, 507.0, 513.0, 458.0, 489.0, 522.0, 519.0, 510.0, 519.0, 525.0, 576.0, 476.0, 522.0, 513.0, 530.0, 530.0, 459.0, 516.0, 498.0, 530.0, 576.0, 462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 269.0, 255.0, 281.0, 255.0, 227.0, 220.0, 227.0, 225.0, 237.0, 230.0, 259.0, 266.0, 271.0, 256.0, 211.0, 253.0, 256.0, 260.0, 232.0, 227.0, 267.0, 249.0, 280.0, 290.0, 262.0, 257.0, 259.0, 263.0, 210.0, 206.0, 247.0, 257.0, 254.0, 253.0, 252.0, 261.0, 231.0, 227.0, 240.0, 249.0, 273.0, 249.0, 259.0, 260.0, 253.0, 257.0, 270.0, 249.0, 255.0, 270.0, 284.0, 292.0, 247.0, 229.0, 265.0, 257.0, 245.0, 268.0, 260.0, 270.0, 273.0, 257.0, 214.0, 245.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0]}, "sampler_perf": {"mean_env_wait_ms": 1.863128513341346, "mean_processing_ms": 0.43994753351318544, "mean_inference_ms": 2.414282806471956}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2568000, "num_steps_sampled": 1369600, "sample_time_ms": 20842.684, "load_time_ms": 37.196, "grad_time_ms": 8837.48, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 2.4651903655498604e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005029598250985146, "policy_loss": -0.0072616818360984325, "vf_loss": 83.45578002929688, "vf_explained_var": 0.7516160011291504, "kl": 0.0016769097419455647, "entropy": 1.1618729829788208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1369600, "episodes_total": 3424, "training_iteration": 107, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-38-17", "timestamp": 1660250297, "time_this_iter_s": 30.596415996551514, "time_total_s": 8715.399705171585, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8715.399705171585, "timesteps_since_restore": 1369600, "iterations_since_restore": 107, "perf": {"cpu_util_percent": 35.45581395348837, "ram_util_percent": 58.237209302325574}} -{"episode_reward_max": 582.0, "episode_reward_min": 390.0, "episode_reward_mean": 513.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 297.0}, "policy_reward_mean": {"ppo": 256.905}, "custom_metrics": {"sparse_reward_mean": 178.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 157.41, "shaped_reward_min": 110, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.95, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.15, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.18, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.18, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.95, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.15, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.95, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.15, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0, 516.0, 498.0, 530.0, 576.0, 462.0, 533.0, 546.0, 518.0, 522.0, 513.0, 492.0, 501.0, 525.0, 522.0, 467.0, 390.0, 522.0, 527.0, 473.0, 525.0, 519.0, 519.0, 468.0, 504.0, 564.0, 470.0, 513.0, 516.0, 522.0, 421.0, 525.0, 573.0, 525.0, 527.0, 525.0, 516.0, 525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 260.0, 256.0, 251.0, 247.0, 262.0, 268.0, 291.0, 285.0, 238.0, 224.0, 270.0, 263.0, 275.0, 271.0, 267.0, 251.0, 254.0, 268.0, 245.0, 268.0, 256.0, 236.0, 270.0, 231.0, 273.0, 252.0, 269.0, 253.0, 231.0, 236.0, 191.0, 199.0, 267.0, 255.0, 272.0, 255.0, 241.0, 232.0, 260.0, 265.0, 266.0, 253.0, 259.0, 260.0, 238.0, 230.0, 260.0, 244.0, 281.0, 283.0, 231.0, 239.0, 259.0, 254.0, 254.0, 262.0, 268.0, 254.0, 210.0, 211.0, 263.0, 262.0, 285.0, 288.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8498555104022234, "mean_processing_ms": 0.43730168549860937, "mean_inference_ms": 2.400809449110995}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2592000, "num_steps_sampled": 1382400, "sample_time_ms": 20702.738, "load_time_ms": 37.249, "grad_time_ms": 9000.45, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2325951827749302e-33, "cur_lr": 0.0010000000474974513, "total_loss": 0.004501763265579939, "policy_loss": -0.002659810474142432, "vf_loss": 77.439453125, "vf_explained_var": 0.7766797542572021, "kl": 0.002080060075968504, "entropy": 1.1647237539291382, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1382400, "episodes_total": 3456, "training_iteration": 108, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-38-46", "timestamp": 1660250326, "time_this_iter_s": 29.44796586036682, "time_total_s": 8744.847671031952, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8744.847671031952, "timesteps_since_restore": 1382400, "iterations_since_restore": 108, "perf": {"cpu_util_percent": 35.6452380952381, "ram_util_percent": 58.221428571428575}} -{"episode_reward_max": 582.0, "episode_reward_min": 404.0, "episode_reward_mean": 517.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 297.0}, "policy_reward_mean": {"ppo": 258.575}, "custom_metrics": {"sparse_reward_mean": 179.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 158.75, "shaped_reward_min": 121, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.45, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.24, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.24, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.24, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0, 525.0, 527.0, 525.0, 516.0, 525.0, 576.0, 468.0, 510.0, 522.0, 510.0, 497.0, 453.0, 462.0, 519.0, 530.0, 522.0, 530.0, 527.0, 561.0, 525.0, 507.0, 522.0, 462.0, 501.0, 522.0, 522.0, 522.0, 530.0, 525.0, 516.0, 522.0, 522.0, 515.0, 573.0, 522.0, 404.0, 495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 259.0, 266.0, 248.0, 279.0, 268.0, 257.0, 264.0, 252.0, 263.0, 262.0, 290.0, 286.0, 236.0, 232.0, 242.0, 268.0, 268.0, 254.0, 271.0, 239.0, 248.0, 249.0, 214.0, 239.0, 230.0, 232.0, 258.0, 261.0, 264.0, 266.0, 267.0, 255.0, 273.0, 257.0, 251.0, 276.0, 293.0, 268.0, 264.0, 261.0, 244.0, 263.0, 267.0, 255.0, 239.0, 223.0, 244.0, 257.0, 273.0, 249.0, 256.0, 266.0, 262.0, 260.0, 252.0, 278.0, 262.0, 263.0, 263.0, 253.0, 251.0, 271.0, 254.0, 268.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8368123627881316, "mean_processing_ms": 0.4346999202219803, "mean_inference_ms": 2.3873597355001146}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2616000, "num_steps_sampled": 1395200, "sample_time_ms": 20622.092, "load_time_ms": 37.297, "grad_time_ms": 9168.951, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.162975913874651e-34, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004981299280188978, "policy_loss": -0.007735797669738531, "vf_loss": 78.24005889892578, "vf_explained_var": 0.7600134015083313, "kl": 0.0021366437431424856, "entropy": 1.1726828813552856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1395200, "episodes_total": 3488, "training_iteration": 109, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-39-16", "timestamp": 1660250356, "time_this_iter_s": 29.312750816345215, "time_total_s": 8774.160421848297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8774.160421848297, "timesteps_since_restore": 1395200, "iterations_since_restore": 109, "perf": {"cpu_util_percent": 34.91219512195122, "ram_util_percent": 58.29999999999999}} -{"episode_reward_max": 582.0, "episode_reward_min": 390.0, "episode_reward_mean": 513.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 256.885}, "custom_metrics": {"sparse_reward_mean": 177.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 158.17, "shaped_reward_min": 116, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.47, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 14.01, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.08, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.01, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.08, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.01, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.08, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0, 515.0, 573.0, 522.0, 404.0, 495.0, 525.0, 579.0, 513.0, 465.0, 519.0, 525.0, 525.0, 503.0, 522.0, 522.0, 533.0, 522.0, 468.0, 522.0, 525.0, 582.0, 573.0, 527.0, 519.0, 519.0, 570.0, 501.0, 519.0, 510.0, 467.0, 522.0, 522.0, 527.0, 533.0, 473.0, 465.0, 498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 270.0, 245.0, 288.0, 285.0, 262.0, 260.0, 222.0, 182.0, 243.0, 252.0, 252.0, 273.0, 293.0, 286.0, 267.0, 246.0, 232.0, 233.0, 260.0, 259.0, 264.0, 261.0, 272.0, 253.0, 250.0, 253.0, 266.0, 256.0, 272.0, 250.0, 269.0, 264.0, 261.0, 261.0, 223.0, 245.0, 249.0, 273.0, 252.0, 273.0, 285.0, 297.0, 287.0, 286.0, 268.0, 259.0, 267.0, 252.0, 260.0, 259.0, 280.0, 290.0, 239.0, 262.0, 267.0, 252.0, 263.0, 247.0, 246.0, 221.0, 249.0, 273.0, 257.0, 265.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8240069603143507, "mean_processing_ms": 0.43214623654293904, "mean_inference_ms": 2.374213479802633}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2640000, "num_steps_sampled": 1408000, "sample_time_ms": 20611.05, "load_time_ms": 37.227, "grad_time_ms": 9292.4, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.0814879569373254e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.006001986563205719, "policy_loss": -0.0016462085768580437, "vf_loss": 82.35639953613281, "vf_explained_var": 0.7567899823188782, "kl": 0.0018497154815122485, "entropy": 1.174903154373169, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1408000, "episodes_total": 3520, "training_iteration": 110, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-39-46", "timestamp": 1660250386, "time_this_iter_s": 30.394602060317993, "time_total_s": 8804.555023908615, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8804.555023908615, "timesteps_since_restore": 1408000, "iterations_since_restore": 110, "perf": {"cpu_util_percent": 34.141860465116274, "ram_util_percent": 58.19302325581395}} -{"episode_reward_max": 579.0, "episode_reward_min": 390.0, "episode_reward_mean": 511.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 299.0}, "policy_reward_mean": {"ppo": 255.505}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 157.01, "shaped_reward_min": 116, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.74, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.59, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.02, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.02, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.02, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0, 527.0, 533.0, 473.0, 465.0, 498.0, 525.0, 519.0, 522.0, 522.0, 516.0, 452.0, 530.0, 530.0, 522.0, 519.0, 473.0, 453.0, 570.0, 522.0, 441.0, 576.0, 476.0, 516.0, 507.0, 573.0, 530.0, 519.0, 579.0, 525.0, 473.0, 510.0, 567.0, 576.0, 516.0, 525.0, 524.0, 396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 265.0, 264.0, 269.0, 234.0, 239.0, 229.0, 236.0, 246.0, 252.0, 261.0, 264.0, 269.0, 250.0, 258.0, 264.0, 264.0, 258.0, 272.0, 244.0, 239.0, 213.0, 263.0, 267.0, 263.0, 267.0, 268.0, 254.0, 267.0, 252.0, 240.0, 233.0, 221.0, 232.0, 284.0, 286.0, 264.0, 258.0, 225.0, 216.0, 292.0, 284.0, 245.0, 231.0, 245.0, 271.0, 258.0, 249.0, 290.0, 283.0, 266.0, 264.0, 251.0, 268.0, 296.0, 283.0, 263.0, 262.0, 219.0, 254.0, 264.0, 246.0, 291.0, 276.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.8114390530893243, "mean_processing_ms": 0.4296416576287852, "mean_inference_ms": 2.361419613419488}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2664000, "num_steps_sampled": 1420800, "sample_time_ms": 20548.052, "load_time_ms": 37.151, "grad_time_ms": 9411.24, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.5407439784686627e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037929911632090807, "policy_loss": -0.003666130593046546, "vf_loss": 80.49629974365234, "vf_explained_var": 0.7547799944877625, "kl": 0.002083237050101161, "entropy": 1.1810179948806763, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1420800, "episodes_total": 3552, "training_iteration": 111, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-40-16", "timestamp": 1660250416, "time_this_iter_s": 29.606700897216797, "time_total_s": 8834.161724805832, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8834.161724805832, "timesteps_since_restore": 1420800, "iterations_since_restore": 111, "perf": {"cpu_util_percent": 34.73809523809524, "ram_util_percent": 58.2547619047619}} -{"episode_reward_max": 576.0, "episode_reward_min": 390.0, "episode_reward_mean": 510.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 299.0}, "policy_reward_mean": {"ppo": 255.4}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 156.8, "shaped_reward_min": 115, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.0, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.57, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.89, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.96, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.89, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.89, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0, 576.0, 516.0, 525.0, 524.0, 396.0, 516.0, 522.0, 465.0, 390.0, 522.0, 530.0, 570.0, 487.0, 519.0, 525.0, 527.0, 507.0, 522.0, 473.0, 519.0, 519.0, 573.0, 519.0, 422.0, 495.0, 525.0, 519.0, 473.0, 522.0, 530.0, 522.0, 510.0, 498.0, 510.0, 516.0, 522.0, 576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 282.0, 294.0, 248.0, 268.0, 252.0, 273.0, 265.0, 259.0, 203.0, 193.0, 260.0, 256.0, 268.0, 254.0, 236.0, 229.0, 203.0, 187.0, 261.0, 261.0, 271.0, 259.0, 272.0, 298.0, 236.0, 251.0, 260.0, 259.0, 269.0, 256.0, 258.0, 269.0, 256.0, 251.0, 270.0, 252.0, 252.0, 221.0, 270.0, 249.0, 256.0, 263.0, 285.0, 288.0, 270.0, 249.0, 203.0, 219.0, 254.0, 241.0, 250.0, 275.0, 273.0, 246.0, 236.0, 237.0, 250.0, 272.0, 270.0, 260.0, 262.0, 260.0, 243.0, 267.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7991027412203113, "mean_processing_ms": 0.4271859275092463, "mean_inference_ms": 2.3490714276117277}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2688000, "num_steps_sampled": 1433600, "sample_time_ms": 20492.657, "load_time_ms": 37.09, "grad_time_ms": 9588.563, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 7.703719892343314e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006233404856175184, "policy_loss": -0.00705617293715477, "vf_loss": 82.67167663574219, "vf_explained_var": 0.7619187235832214, "kl": 0.0019442345947027206, "entropy": 1.175291895866394, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1433600, "episodes_total": 3584, "training_iteration": 112, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-40-46", "timestamp": 1660250446, "time_this_iter_s": 30.788507223129272, "time_total_s": 8864.950232028961, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8864.950232028961, "timesteps_since_restore": 1433600, "iterations_since_restore": 112, "perf": {"cpu_util_percent": 33.402325581395345, "ram_util_percent": 58.2720930232558}} -{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 509.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 254.9}, "custom_metrics": {"sparse_reward_mean": 177.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 155.8, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.21, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.54, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.99, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.49, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.89, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.89, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.89, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0, 498.0, 510.0, 516.0, 522.0, 576.0, 507.0, 513.0, 504.0, 462.0, 522.0, 498.0, 576.0, 516.0, 519.0, 519.0, 564.0, 519.0, 561.0, 401.0, 522.0, 519.0, 570.0, 501.0, 484.0, 519.0, 522.0, 507.0, 413.0, 516.0, 573.0, 516.0, 522.0, 522.0, 422.0, 465.0, 482.0, 507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 243.0, 255.0, 266.0, 244.0, 269.0, 247.0, 249.0, 273.0, 289.0, 287.0, 250.0, 257.0, 251.0, 262.0, 242.0, 262.0, 240.0, 222.0, 250.0, 272.0, 244.0, 254.0, 283.0, 293.0, 256.0, 260.0, 261.0, 258.0, 263.0, 256.0, 265.0, 299.0, 261.0, 258.0, 276.0, 285.0, 212.0, 189.0, 259.0, 263.0, 262.0, 257.0, 282.0, 288.0, 259.0, 242.0, 250.0, 234.0, 257.0, 262.0, 259.0, 263.0, 258.0, 249.0, 203.0, 210.0, 268.0, 248.0, 298.0, 275.0, 265.0, 251.0, 260.0, 262.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7869849940199092, "mean_processing_ms": 0.4247741951637306, "mean_inference_ms": 2.3369101082045303}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2712000, "num_steps_sampled": 1446400, "sample_time_ms": 20390.667, "load_time_ms": 37.347, "grad_time_ms": 9843.106, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 3.851859946171657e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0046621630899608135, "policy_loss": -0.0027472442016005516, "vf_loss": 80.03414916992188, "vf_explained_var": 0.7775616645812988, "kl": 0.0022294942755252123, "entropy": 1.1880191564559937, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1446400, "episodes_total": 3616, "training_iteration": 113, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-41-18", "timestamp": 1660250478, "time_this_iter_s": 31.195298194885254, "time_total_s": 8896.145530223846, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8896.145530223846, "timesteps_since_restore": 1446400, "iterations_since_restore": 113, "perf": {"cpu_util_percent": 34.73181818181818, "ram_util_percent": 58.20681818181818}} -{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 507.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 253.95}, "custom_metrics": {"sparse_reward_mean": 175.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.3, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.0, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.27, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.44, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.7, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.77, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.86, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.77, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.77, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0, 522.0, 422.0, 465.0, 482.0, 507.0, 527.0, 513.0, 516.0, 479.0, 516.0, 441.0, 573.0, 522.0, 516.0, 513.0, 395.0, 525.0, 519.0, 521.0, 522.0, 522.0, 525.0, 530.0, 525.0, 462.0, 522.0, 441.0, 530.0, 573.0, 525.0, 527.0, 525.0, 522.0, 513.0, 570.0, 525.0, 570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 267.0, 255.0, 203.0, 219.0, 230.0, 235.0, 247.0, 235.0, 262.0, 245.0, 263.0, 264.0, 258.0, 255.0, 262.0, 254.0, 235.0, 244.0, 256.0, 260.0, 221.0, 220.0, 293.0, 280.0, 256.0, 266.0, 273.0, 243.0, 262.0, 251.0, 195.0, 200.0, 272.0, 253.0, 253.0, 266.0, 258.0, 263.0, 263.0, 259.0, 262.0, 260.0, 262.0, 263.0, 255.0, 275.0, 252.0, 273.0, 226.0, 236.0, 274.0, 248.0, 233.0, 208.0, 270.0, 260.0, 288.0, 285.0, 249.0, 276.0, 265.0, 262.0, 259.0, 266.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0]}, "sampler_perf": {"mean_env_wait_ms": 1.775081398673085, "mean_processing_ms": 0.4224038975736352, "mean_inference_ms": 2.3249390744039835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2736000, "num_steps_sampled": 1459200, "sample_time_ms": 20266.804, "load_time_ms": 37.384, "grad_time_ms": 9843.514, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.9259299730858284e-35, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005095542292110622, "policy_loss": -0.007585855200886726, "vf_loss": 76.6465072631836, "vf_explained_var": 0.7633175849914551, "kl": 0.0019467826932668686, "entropy": 1.1766948699951172, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1459200, "episodes_total": 3648, "training_iteration": 114, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-41-47", "timestamp": 1660250507, "time_this_iter_s": 29.75877094268799, "time_total_s": 8925.904301166534, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8925.904301166534, "timesteps_since_restore": 1459200, "iterations_since_restore": 114, "perf": {"cpu_util_percent": 34.199999999999996, "ram_util_percent": 58.228571428571435}} -{"episode_reward_max": 576.0, "episode_reward_min": 174.0, "episode_reward_mean": 512.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 85.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 256.345}, "custom_metrics": {"sparse_reward_mean": 177.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 157.09, "shaped_reward_min": 54, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.25, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.9, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.71, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.9, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.9, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0, 522.0, 513.0, 570.0, 525.0, 570.0, 525.0, 361.0, 525.0, 524.0, 465.0, 461.0, 525.0, 174.0, 524.0, 570.0, 522.0, 519.0, 522.0, 444.0, 570.0, 570.0, 525.0, 519.0, 525.0, 516.0, 479.0, 522.0, 525.0, 513.0, 516.0, 570.0, 462.0, 573.0, 530.0, 492.0, 522.0, 519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 261.0, 261.0, 254.0, 259.0, 277.0, 293.0, 255.0, 270.0, 288.0, 282.0, 263.0, 262.0, 187.0, 174.0, 246.0, 279.0, 256.0, 268.0, 230.0, 235.0, 234.0, 227.0, 265.0, 260.0, 89.0, 85.0, 262.0, 262.0, 288.0, 282.0, 262.0, 260.0, 258.0, 261.0, 257.0, 265.0, 224.0, 220.0, 287.0, 283.0, 305.0, 265.0, 264.0, 261.0, 259.0, 260.0, 267.0, 258.0, 259.0, 257.0, 236.0, 243.0, 278.0, 244.0, 262.0, 263.0, 248.0, 265.0, 258.0, 258.0, 285.0, 285.0, 237.0, 225.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7633797791154415, "mean_processing_ms": 0.42007059281459463, "mean_inference_ms": 2.3128736955847145}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2760000, "num_steps_sampled": 1472000, "sample_time_ms": 20145.685, "load_time_ms": 37.355, "grad_time_ms": 9802.708, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 9.629649865429142e-36, "cur_lr": 0.0010000000474974513, "total_loss": -0.001834428054280579, "policy_loss": -0.008199676871299744, "vf_loss": 69.51961517333984, "vf_explained_var": 0.7745820879936218, "kl": 0.002087961183860898, "entropy": 1.1734023094177246, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1472000, "episodes_total": 3680, "training_iteration": 115, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-42-15", "timestamp": 1660250535, "time_this_iter_s": 28.112826824188232, "time_total_s": 8954.017127990723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8954.017127990723, "timesteps_since_restore": 1472000, "iterations_since_restore": 115, "perf": {"cpu_util_percent": 35.5225, "ram_util_percent": 58.23499999999999}} -{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 256.435}, "custom_metrics": {"sparse_reward_mean": 178.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.47, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.8, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.04, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.24, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.65, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.04, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.04, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0, 573.0, 530.0, 492.0, 522.0, 519.0, 479.0, 516.0, 449.0, 507.0, 573.0, 504.0, 492.0, 424.0, 536.0, 573.0, 478.0, 576.0, 522.0, 522.0, 519.0, 567.0, 401.0, 490.0, 522.0, 513.0, 522.0, 245.0, 521.0, 533.0, 570.0, 573.0, 516.0, 464.0, 576.0, 573.0, 522.0, 522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 286.0, 287.0, 272.0, 258.0, 248.0, 244.0, 267.0, 255.0, 254.0, 265.0, 241.0, 238.0, 257.0, 259.0, 219.0, 230.0, 263.0, 244.0, 289.0, 284.0, 241.0, 263.0, 247.0, 245.0, 214.0, 210.0, 263.0, 273.0, 272.0, 301.0, 230.0, 248.0, 291.0, 285.0, 275.0, 247.0, 260.0, 262.0, 259.0, 260.0, 279.0, 288.0, 209.0, 192.0, 244.0, 246.0, 254.0, 268.0, 241.0, 272.0, 267.0, 255.0, 122.0, 123.0, 264.0, 257.0, 270.0, 263.0, 303.0, 267.0, 287.0, 286.0, 249.0, 267.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7518867216658078, "mean_processing_ms": 0.4177743622386954, "mean_inference_ms": 2.300854372548392}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2784000, "num_steps_sampled": 1484800, "sample_time_ms": 20021.401, "load_time_ms": 37.489, "grad_time_ms": 9776.788, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 4.814824932714571e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.00206244015134871, "policy_loss": -0.005156705155968666, "vf_loss": 78.12344360351562, "vf_explained_var": 0.7715883851051331, "kl": 0.0026745833456516266, "entropy": 1.1864006519317627, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1484800, "episodes_total": 3712, "training_iteration": 116, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-42-45", "timestamp": 1660250565, "time_this_iter_s": 29.3955659866333, "time_total_s": 8983.412693977356, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 8983.412693977356, "timesteps_since_restore": 1484800, "iterations_since_restore": 116, "perf": {"cpu_util_percent": 35.66904761904762, "ram_util_percent": 58.25}} -{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 513.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 256.76}, "custom_metrics": {"sparse_reward_mean": 178.4, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.72, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.34, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.81, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0, 464.0, 576.0, 573.0, 522.0, 522.0, 516.0, 467.0, 530.0, 507.0, 513.0, 519.0, 558.0, 504.0, 522.0, 501.0, 525.0, 525.0, 524.0, 519.0, 513.0, 513.0, 525.0, 516.0, 567.0, 510.0, 521.0, 513.0, 522.0, 518.0, 576.0, 530.0, 527.0, 459.0, 525.0, 570.0, 525.0, 573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 234.0, 230.0, 294.0, 282.0, 292.0, 281.0, 256.0, 266.0, 262.0, 260.0, 251.0, 265.0, 239.0, 228.0, 266.0, 264.0, 255.0, 252.0, 253.0, 260.0, 254.0, 265.0, 285.0, 273.0, 244.0, 260.0, 270.0, 252.0, 247.0, 254.0, 270.0, 255.0, 260.0, 265.0, 265.0, 259.0, 258.0, 261.0, 264.0, 249.0, 268.0, 245.0, 268.0, 257.0, 251.0, 265.0, 276.0, 291.0, 253.0, 257.0, 263.0, 258.0, 264.0, 249.0, 264.0, 258.0, 264.0, 254.0, 288.0, 288.0, 266.0, 264.0, 262.0, 265.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7406015862798738, "mean_processing_ms": 0.4155246745297136, "mean_inference_ms": 2.2890629120226706}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2808000, "num_steps_sampled": 1497600, "sample_time_ms": 20042.808, "load_time_ms": 37.283, "grad_time_ms": 9721.955, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 2.4074124663572855e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.00018632395949680358, "policy_loss": -0.007065422832965851, "vf_loss": 78.37664794921875, "vf_explained_var": 0.7705362439155579, "kl": 0.0016083299415186048, "entropy": 1.171847939491272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1497600, "episodes_total": 3744, "training_iteration": 117, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-43-15", "timestamp": 1660250595, "time_this_iter_s": 30.259077787399292, "time_total_s": 9013.671771764755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9013.671771764755, "timesteps_since_restore": 1497600, "iterations_since_restore": 117, "perf": {"cpu_util_percent": 36.71627906976744, "ram_util_percent": 58.1906976744186}} -{"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 75.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 256.43}, "custom_metrics": {"sparse_reward_mean": 178.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.86, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.99, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.63, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.1, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.01, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.07, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.1, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.01, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.1, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.01, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0, 459.0, 525.0, 570.0, 525.0, 573.0, 504.0, 522.0, 339.0, 465.0, 416.0, 516.0, 476.0, 573.0, 519.0, 530.0, 180.0, 570.0, 570.0, 470.0, 573.0, 456.0, 573.0, 513.0, 501.0, 567.0, 579.0, 519.0, 518.0, 582.0, 519.0, 564.0, 504.0, 582.0, 461.0, 456.0, 501.0, 519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 241.0, 218.0, 258.0, 267.0, 279.0, 291.0, 249.0, 276.0, 283.0, 290.0, 254.0, 250.0, 251.0, 271.0, 161.0, 178.0, 228.0, 237.0, 211.0, 205.0, 248.0, 268.0, 241.0, 235.0, 300.0, 273.0, 271.0, 248.0, 269.0, 261.0, 105.0, 75.0, 287.0, 283.0, 289.0, 281.0, 248.0, 222.0, 283.0, 290.0, 216.0, 240.0, 289.0, 284.0, 261.0, 252.0, 248.0, 253.0, 266.0, 301.0, 284.0, 295.0, 268.0, 251.0, 245.0, 273.0, 291.0, 291.0, 257.0, 262.0, 277.0, 287.0, 258.0, 246.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7295179891011523, "mean_processing_ms": 0.4133177002223833, "mean_inference_ms": 2.2776485040760637}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2832000, "num_steps_sampled": 1510400, "sample_time_ms": 20089.612, "load_time_ms": 37.29, "grad_time_ms": 9589.898, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 1.2037062331786428e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031413733959198, "policy_loss": -0.00419240677729249, "vf_loss": 79.22246551513672, "vf_explained_var": 0.7654686570167542, "kl": 0.0017640552250668406, "entropy": 1.1769217252731323, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1510400, "episodes_total": 3776, "training_iteration": 118, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-43-44", "timestamp": 1660250624, "time_this_iter_s": 28.596869230270386, "time_total_s": 9042.268640995026, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9042.268640995026, "timesteps_since_restore": 1510400, "iterations_since_restore": 118, "perf": {"cpu_util_percent": 34.160000000000004, "ram_util_percent": 58.245000000000005}} -{"episode_reward_max": 582.0, "episode_reward_min": 222.0, "episode_reward_mean": 517.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 105.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 258.905}, "custom_metrics": {"sparse_reward_mean": 179.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 159.01, "shaped_reward_min": 62, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.89, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.83, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.07, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.8, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.46, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.8, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.8, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0, 582.0, 461.0, 456.0, 501.0, 519.0, 530.0, 519.0, 455.0, 222.0, 524.0, 519.0, 525.0, 519.0, 576.0, 467.0, 522.0, 579.0, 570.0, 522.0, 512.0, 510.0, 495.0, 522.0, 573.0, 522.0, 573.0, 525.0, 576.0, 482.0, 522.0, 484.0, 525.0, 456.0, 461.0, 516.0, 522.0, 530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 287.0, 295.0, 234.0, 227.0, 233.0, 223.0, 248.0, 253.0, 254.0, 265.0, 273.0, 257.0, 254.0, 265.0, 220.0, 235.0, 105.0, 117.0, 259.0, 265.0, 265.0, 254.0, 260.0, 265.0, 261.0, 258.0, 278.0, 298.0, 227.0, 240.0, 253.0, 269.0, 286.0, 293.0, 290.0, 280.0, 266.0, 256.0, 252.0, 260.0, 252.0, 258.0, 237.0, 258.0, 264.0, 258.0, 277.0, 296.0, 268.0, 254.0, 285.0, 288.0, 270.0, 255.0, 279.0, 297.0, 231.0, 251.0, 257.0, 265.0, 237.0, 247.0, 263.0, 262.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7186253925564765, "mean_processing_ms": 0.4111520953158576, "mean_inference_ms": 2.2666653584244876}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2856000, "num_steps_sampled": 1523200, "sample_time_ms": 20187.204, "load_time_ms": 37.338, "grad_time_ms": 9575.103, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 6.018531165893214e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.002520867856219411, "policy_loss": -0.0053411815315485, "vf_loss": 84.43938446044922, "vf_explained_var": 0.7439851760864258, "kl": 0.0023064902052283287, "entropy": 1.1637717485427856, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1523200, "episodes_total": 3808, "training_iteration": 119, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-44-14", "timestamp": 1660250654, "time_this_iter_s": 30.140514850616455, "time_total_s": 9072.409155845642, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9072.409155845642, "timesteps_since_restore": 1523200, "iterations_since_restore": 119, "perf": {"cpu_util_percent": 32.737209302325574, "ram_util_percent": 58.15116279069766}} -{"episode_reward_max": 582.0, "episode_reward_min": 398.0, "episode_reward_mean": 521.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 260.715}, "custom_metrics": {"sparse_reward_mean": 180.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 159.83, "shaped_reward_min": 118, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.01, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 13.87, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.87, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.87, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0, 456.0, 461.0, 516.0, 522.0, 530.0, 516.0, 510.0, 525.0, 570.0, 468.0, 495.0, 525.0, 519.0, 519.0, 522.0, 579.0, 579.0, 501.0, 527.0, 530.0, 525.0, 573.0, 525.0, 522.0, 522.0, 527.0, 444.0, 515.0, 462.0, 533.0, 525.0, 519.0, 519.0, 527.0, 504.0, 515.0, 570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 221.0, 235.0, 234.0, 227.0, 260.0, 256.0, 262.0, 260.0, 263.0, 267.0, 265.0, 251.0, 249.0, 261.0, 265.0, 260.0, 283.0, 287.0, 227.0, 241.0, 239.0, 256.0, 260.0, 265.0, 256.0, 263.0, 269.0, 250.0, 263.0, 259.0, 282.0, 297.0, 288.0, 291.0, 249.0, 252.0, 273.0, 254.0, 275.0, 255.0, 276.0, 249.0, 281.0, 292.0, 256.0, 269.0, 267.0, 255.0, 253.0, 269.0, 265.0, 262.0, 220.0, 224.0, 258.0, 257.0, 218.0, 244.0, 263.0, 270.0, 262.0, 263.0, 251.0, 268.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0]}, "sampler_perf": {"mean_env_wait_ms": 1.7079238505690706, "mean_processing_ms": 0.40902061791269845, "mean_inference_ms": 2.2560343134520804}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2880000, "num_steps_sampled": 1536000, "sample_time_ms": 20247.355, "load_time_ms": 37.42, "grad_time_ms": 9479.74, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.009265582946607e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019442923367023468, "policy_loss": -0.005335395690053701, "vf_loss": 78.6177749633789, "vf_explained_var": 0.7651795744895935, "kl": 0.0017719753086566925, "entropy": 1.164175033569336, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1536000, "episodes_total": 3840, "training_iteration": 120, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-44-44", "timestamp": 1660250684, "time_this_iter_s": 30.042346954345703, "time_total_s": 9102.451502799988, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9102.451502799988, "timesteps_since_restore": 1536000, "iterations_since_restore": 120, "perf": {"cpu_util_percent": 33.52142857142857, "ram_util_percent": 58.276190476190465}} -{"episode_reward_max": 582.0, "episode_reward_min": 365.0, "episode_reward_mean": 523.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 261.805}, "custom_metrics": {"sparse_reward_mean": 181.6, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 160.41, "shaped_reward_min": 118, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.47, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 3.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.28, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0, 519.0, 527.0, 504.0, 515.0, 570.0, 573.0, 570.0, 525.0, 507.0, 558.0, 522.0, 465.0, 525.0, 522.0, 519.0, 507.0, 519.0, 525.0, 482.0, 576.0, 522.0, 484.0, 530.0, 507.0, 516.0, 462.0, 530.0, 525.0, 561.0, 519.0, 573.0, 530.0, 519.0, 464.0, 533.0, 525.0, 510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 267.0, 252.0, 261.0, 266.0, 253.0, 251.0, 251.0, 264.0, 276.0, 294.0, 293.0, 280.0, 284.0, 286.0, 275.0, 250.0, 246.0, 261.0, 279.0, 279.0, 281.0, 241.0, 223.0, 242.0, 255.0, 270.0, 258.0, 264.0, 268.0, 251.0, 244.0, 263.0, 256.0, 263.0, 255.0, 270.0, 253.0, 229.0, 282.0, 294.0, 258.0, 264.0, 238.0, 246.0, 264.0, 266.0, 259.0, 248.0, 263.0, 253.0, 232.0, 230.0, 270.0, 260.0, 256.0, 269.0, 276.0, 285.0, 261.0, 258.0, 303.0, 270.0, 270.0, 260.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6974210688663596, "mean_processing_ms": 0.40692675131976414, "mean_inference_ms": 2.2458724940047134}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2904000, "num_steps_sampled": 1548800, "sample_time_ms": 20389.537, "load_time_ms": 37.524, "grad_time_ms": 9428.918, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.5046327914733034e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008685672655701637, "policy_loss": -0.007173554971814156, "vf_loss": 86.26753997802734, "vf_explained_var": 0.7487472891807556, "kl": 0.001581608667038381, "entropy": 1.1692520380020142, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1548800, "episodes_total": 3872, "training_iteration": 121, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-45-15", "timestamp": 1660250715, "time_this_iter_s": 30.52119469642639, "time_total_s": 9132.972697496414, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9132.972697496414, "timesteps_since_restore": 1548800, "iterations_since_restore": 121, "perf": {"cpu_util_percent": 33.260465116279065, "ram_util_percent": 58.28837209302325}} -{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 521.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.89}, "custom_metrics": {"sparse_reward_mean": 180.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 160.18, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.12, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.46, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 3.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.17, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.12, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.46, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.12, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.46, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0, 519.0, 464.0, 533.0, 525.0, 510.0, 576.0, 507.0, 527.0, 570.0, 510.0, 501.0, 522.0, 570.0, 516.0, 467.0, 518.0, 507.0, 579.0, 530.0, 570.0, 398.0, 582.0, 573.0, 433.0, 530.0, 570.0, 516.0, 524.0, 399.0, 570.0, 533.0, 573.0, 522.0, 525.0, 501.0, 522.0, 564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 254.0, 265.0, 231.0, 233.0, 262.0, 271.0, 260.0, 265.0, 249.0, 261.0, 286.0, 290.0, 251.0, 256.0, 280.0, 247.0, 287.0, 283.0, 259.0, 251.0, 249.0, 252.0, 257.0, 265.0, 286.0, 284.0, 256.0, 260.0, 236.0, 231.0, 265.0, 253.0, 262.0, 245.0, 280.0, 299.0, 276.0, 254.0, 301.0, 269.0, 205.0, 193.0, 291.0, 291.0, 292.0, 281.0, 216.0, 217.0, 283.0, 247.0, 292.0, 278.0, 272.0, 244.0, 255.0, 269.0, 208.0, 191.0, 274.0, 296.0, 271.0, 262.0, 278.0, 295.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6870995124483497, "mean_processing_ms": 0.4048687165306798, "mean_inference_ms": 2.2359032456104564}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2928000, "num_steps_sampled": 1561600, "sample_time_ms": 20397.642, "load_time_ms": 37.125, "grad_time_ms": 9297.193, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 7.523163957366517e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016971243312582374, "policy_loss": -0.006050370167940855, "vf_loss": 83.2496109008789, "vf_explained_var": 0.7647652626037598, "kl": 0.0023221501614898443, "entropy": 1.154932975769043, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1561600, "episodes_total": 3904, "training_iteration": 122, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-45-44", "timestamp": 1660250744, "time_this_iter_s": 29.548327922821045, "time_total_s": 9162.521025419235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9162.521025419235, "timesteps_since_restore": 1561600, "iterations_since_restore": 122, "perf": {"cpu_util_percent": 34.20476190476191, "ram_util_percent": 58.335714285714296}} -{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 527.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 263.885}, "custom_metrics": {"sparse_reward_mean": 182.6, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 162.57, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.24, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.15, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.63, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.07, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.91, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.24, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.24, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0, 522.0, 525.0, 501.0, 522.0, 564.0, 579.0, 507.0, 530.0, 525.0, 365.0, 518.0, 522.0, 501.0, 525.0, 515.0, 576.0, 582.0, 530.0, 507.0, 522.0, 530.0, 522.0, 456.0, 504.0, 582.0, 579.0, 519.0, 533.0, 573.0, 525.0, 498.0, 567.0, 573.0, 450.0, 479.0, 522.0, 576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 252.0, 270.0, 263.0, 262.0, 255.0, 246.0, 249.0, 273.0, 292.0, 272.0, 290.0, 289.0, 251.0, 256.0, 252.0, 278.0, 263.0, 262.0, 185.0, 180.0, 260.0, 258.0, 254.0, 268.0, 247.0, 254.0, 265.0, 260.0, 262.0, 253.0, 290.0, 286.0, 296.0, 286.0, 257.0, 273.0, 255.0, 252.0, 265.0, 257.0, 265.0, 265.0, 274.0, 248.0, 218.0, 238.0, 260.0, 244.0, 291.0, 291.0, 293.0, 286.0, 260.0, 259.0, 270.0, 263.0, 285.0, 288.0, 279.0, 246.0, 252.0, 246.0, 275.0, 292.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0]}, "sampler_perf": {"mean_env_wait_ms": 1.676944844024091, "mean_processing_ms": 0.4028482080008365, "mean_inference_ms": 2.2261191245028336}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2952000, "num_steps_sampled": 1574400, "sample_time_ms": 20489.857, "load_time_ms": 37.388, "grad_time_ms": 9147.038, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 3.7615819786832586e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.004776147659868002, "policy_loss": -0.0032110288739204407, "vf_loss": 85.63726806640625, "vf_explained_var": 0.7386021614074707, "kl": 0.0019908936228603125, "entropy": 1.1530929803848267, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1574400, "episodes_total": 3936, "training_iteration": 123, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-46-15", "timestamp": 1660250775, "time_this_iter_s": 30.61848020553589, "time_total_s": 9193.139505624771, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9193.139505624771, "timesteps_since_restore": 1574400, "iterations_since_restore": 123, "perf": {"cpu_util_percent": 33.09302325581395, "ram_util_percent": 58.90232558139535}} -{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 532.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 58.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.085}, "custom_metrics": {"sparse_reward_mean": 184.2, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 163.77, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.81, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.59, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.07, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.42, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0, 573.0, 450.0, 479.0, 522.0, 576.0, 522.0, 522.0, 482.0, 522.0, 579.0, 573.0, 527.0, 530.0, 522.0, 123.0, 519.0, 570.0, 525.0, 470.0, 522.0, 573.0, 492.0, 570.0, 576.0, 465.0, 411.0, 525.0, 525.0, 579.0, 510.0, 516.0, 573.0, 525.0, 536.0, 573.0, 573.0, 536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 287.0, 286.0, 218.0, 232.0, 248.0, 231.0, 260.0, 262.0, 285.0, 291.0, 267.0, 255.0, 272.0, 250.0, 246.0, 236.0, 273.0, 249.0, 283.0, 296.0, 298.0, 275.0, 264.0, 263.0, 266.0, 264.0, 255.0, 267.0, 58.0, 65.0, 249.0, 270.0, 298.0, 272.0, 261.0, 264.0, 245.0, 225.0, 252.0, 270.0, 276.0, 297.0, 243.0, 249.0, 273.0, 297.0, 292.0, 284.0, 231.0, 234.0, 201.0, 210.0, 269.0, 256.0, 262.0, 263.0, 287.0, 292.0, 265.0, 245.0, 248.0, 268.0, 292.0, 281.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.666936267539323, "mean_processing_ms": 0.40085873681350664, "mean_inference_ms": 2.216239678267129}, "off_policy_estimator": {}, "info": {"num_steps_trained": 2976000, "num_steps_sampled": 1587200, "sample_time_ms": 20498.249, "load_time_ms": 37.366, "grad_time_ms": 8983.735, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 1.8807909893416293e-38, "cur_lr": 0.0010000000474974513, "total_loss": 0.001341886818408966, "policy_loss": -0.006108943372964859, "vf_loss": 80.26326751708984, "vf_explained_var": 0.763457715511322, "kl": 0.0015635616146028042, "entropy": 1.1509909629821777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1587200, "episodes_total": 3968, "training_iteration": 124, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-46-43", "timestamp": 1660250803, "time_this_iter_s": 28.20863699913025, "time_total_s": 9221.348142623901, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9221.348142623901, "timesteps_since_restore": 1587200, "iterations_since_restore": 124, "perf": {"cpu_util_percent": 33.417500000000004, "ram_util_percent": 58.46}} -{"episode_reward_max": 582.0, "episode_reward_min": 450.0, "episode_reward_mean": 544.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 212.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.12}, "custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.04, "shaped_reward_min": 130, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.58, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0, 525.0, 536.0, 573.0, 573.0, 536.0, 573.0, 564.0, 532.0, 527.0, 579.0, 573.0, 519.0, 530.0, 576.0, 576.0, 576.0, 573.0, 576.0, 476.0, 516.0, 501.0, 522.0, 498.0, 576.0, 527.0, 482.0, 519.0, 576.0, 533.0, 570.0, 522.0, 527.0, 530.0, 536.0, 527.0, 573.0, 525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 259.0, 266.0, 273.0, 263.0, 271.0, 302.0, 295.0, 278.0, 278.0, 258.0, 292.0, 281.0, 269.0, 295.0, 260.0, 272.0, 253.0, 274.0, 305.0, 274.0, 287.0, 286.0, 256.0, 263.0, 262.0, 268.0, 287.0, 289.0, 281.0, 295.0, 285.0, 291.0, 278.0, 295.0, 293.0, 283.0, 248.0, 228.0, 251.0, 265.0, 249.0, 252.0, 268.0, 254.0, 251.0, 247.0, 293.0, 283.0, 270.0, 257.0, 235.0, 247.0, 258.0, 261.0, 294.0, 282.0, 265.0, 268.0, 285.0, 285.0, 271.0, 251.0, 259.0, 268.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6570861935641006, "mean_processing_ms": 0.3989029759372638, "mean_inference_ms": 2.206484585731059}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3000000, "num_steps_sampled": 1600000, "sample_time_ms": 20649.468, "load_time_ms": 37.298, "grad_time_ms": 8980.547, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001723404973745346, "policy_loss": -0.005563261453062296, "vf_loss": 78.65084075927734, "vf_explained_var": 0.7562505602836609, "kl": 0.00201344583183527, "entropy": 1.1568351984024048, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1600000, "episodes_total": 4000, "training_iteration": 125, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-47-13", "timestamp": 1660250833, "time_this_iter_s": 29.59022808074951, "time_total_s": 9250.93837070465, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9250.93837070465, "timesteps_since_restore": 1600000, "iterations_since_restore": 125, "perf": {"cpu_util_percent": 29.699999999999996, "ram_util_percent": 58.414285714285725}} -{"episode_reward_max": 627.0, "episode_reward_min": 288.0, "episode_reward_mean": 541.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 270.505}, "custom_metrics": {"sparse_reward_mean": 187.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 165.81, "shaped_reward_min": 88, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.05, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.46, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0, 530.0, 536.0, 527.0, 573.0, 525.0, 501.0, 519.0, 573.0, 579.0, 527.0, 567.0, 519.0, 579.0, 522.0, 519.0, 524.0, 570.0, 519.0, 573.0, 521.0, 576.0, 507.0, 576.0, 573.0, 455.0, 533.0, 525.0, 582.0, 579.0, 576.0, 516.0, 510.0, 522.0, 513.0, 513.0, 573.0, 579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 257.0, 273.0, 270.0, 266.0, 259.0, 268.0, 285.0, 288.0, 270.0, 255.0, 250.0, 251.0, 260.0, 259.0, 289.0, 284.0, 296.0, 283.0, 279.0, 248.0, 288.0, 279.0, 256.0, 263.0, 298.0, 281.0, 264.0, 258.0, 267.0, 252.0, 260.0, 264.0, 272.0, 298.0, 253.0, 266.0, 289.0, 284.0, 271.0, 250.0, 297.0, 279.0, 243.0, 264.0, 290.0, 286.0, 288.0, 285.0, 219.0, 236.0, 270.0, 263.0, 280.0, 245.0, 293.0, 289.0, 294.0, 285.0, 270.0, 306.0, 255.0, 261.0, 263.0, 247.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6473988294860027, "mean_processing_ms": 0.3969770866429698, "mean_inference_ms": 2.1969066690858874}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3024000, "num_steps_sampled": 1612800, "sample_time_ms": 20818.492, "load_time_ms": 37.263, "grad_time_ms": 8921.308, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004004280548542738, "policy_loss": -0.004071192815899849, "vf_loss": 86.5199966430664, "vf_explained_var": 0.7602561116218567, "kl": 0.0020587241742759943, "entropy": 1.153051495552063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1612800, "episodes_total": 4032, "training_iteration": 126, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-47-43", "timestamp": 1660250863, "time_this_iter_s": 30.492609977722168, "time_total_s": 9281.430980682373, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9281.430980682373, "timesteps_since_restore": 1612800, "iterations_since_restore": 126, "perf": {"cpu_util_percent": 30.204651162790697, "ram_util_percent": 58.4372093023256}} -{"episode_reward_max": 630.0, "episode_reward_min": 288.0, "episode_reward_mean": 546.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 273.07}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 166.14, "shaped_reward_min": 88, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.74, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.43, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.54, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.54, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.54, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0, 522.0, 513.0, 513.0, 573.0, 579.0, 533.0, 573.0, 579.0, 564.0, 579.0, 579.0, 570.0, 567.0, 524.0, 573.0, 573.0, 516.0, 525.0, 522.0, 450.0, 576.0, 513.0, 476.0, 573.0, 576.0, 576.0, 579.0, 573.0, 522.0, 519.0, 579.0, 539.0, 570.0, 573.0, 507.0, 573.0, 470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 251.0, 271.0, 255.0, 258.0, 267.0, 246.0, 284.0, 289.0, 284.0, 295.0, 270.0, 263.0, 304.0, 269.0, 285.0, 294.0, 286.0, 278.0, 279.0, 300.0, 290.0, 289.0, 284.0, 286.0, 297.0, 270.0, 281.0, 243.0, 283.0, 290.0, 288.0, 285.0, 261.0, 255.0, 257.0, 268.0, 258.0, 264.0, 212.0, 238.0, 292.0, 284.0, 259.0, 254.0, 239.0, 237.0, 290.0, 283.0, 279.0, 297.0, 288.0, 288.0, 280.0, 299.0, 289.0, 284.0, 259.0, 263.0, 274.0, 245.0, 299.0, 280.0, 261.0, 278.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0]}, "sampler_perf": {"mean_env_wait_ms": 1.637864403277332, "mean_processing_ms": 0.39508190605522825, "mean_inference_ms": 2.1874563334987878}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3048000, "num_steps_sampled": 1625600, "sample_time_ms": 20804.416, "load_time_ms": 37.185, "grad_time_ms": 8880.278, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00042137285345233977, "policy_loss": -0.007074173539876938, "vf_loss": 72.28662872314453, "vf_explained_var": 0.7638903260231018, "kl": 0.0020576624665409327, "entropy": 1.1517143249511719, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1625600, "episodes_total": 4064, "training_iteration": 127, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-48-13", "timestamp": 1660250893, "time_this_iter_s": 29.709146738052368, "time_total_s": 9311.140127420425, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9311.140127420425, "timesteps_since_restore": 1625600, "iterations_since_restore": 127, "perf": {"cpu_util_percent": 33.61904761904762, "ram_util_percent": 58.37380952380953}} -{"episode_reward_max": 630.0, "episode_reward_min": 288.0, "episode_reward_mean": 546.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 136.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 273.2}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 166.4, "shaped_reward_min": 88, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.44, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.44, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.44, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0, 570.0, 573.0, 507.0, 573.0, 470.0, 522.0, 519.0, 570.0, 522.0, 522.0, 522.0, 627.0, 461.0, 484.0, 573.0, 573.0, 288.0, 561.0, 513.0, 576.0, 573.0, 576.0, 522.0, 525.0, 576.0, 519.0, 510.0, 576.0, 576.0, 522.0, 533.0, 530.0, 527.0, 579.0, 570.0, 522.0, 576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 295.0, 275.0, 285.0, 288.0, 240.0, 267.0, 288.0, 285.0, 234.0, 236.0, 268.0, 254.0, 259.0, 260.0, 285.0, 285.0, 260.0, 262.0, 261.0, 261.0, 253.0, 269.0, 316.0, 311.0, 216.0, 245.0, 240.0, 244.0, 301.0, 272.0, 287.0, 286.0, 136.0, 152.0, 282.0, 279.0, 255.0, 258.0, 282.0, 294.0, 284.0, 289.0, 275.0, 301.0, 249.0, 273.0, 274.0, 251.0, 299.0, 277.0, 271.0, 248.0, 258.0, 252.0, 285.0, 291.0, 282.0, 294.0, 252.0, 270.0, 256.0, 277.0, 265.0, 265.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6284902770333296, "mean_processing_ms": 0.39321693792454526, "mean_inference_ms": 2.178397267354796}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3072000, "num_steps_sampled": 1638400, "sample_time_ms": 20978.899, "load_time_ms": 37.247, "grad_time_ms": 8964.602, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014722892083227634, "policy_loss": -0.0057091922499239445, "vf_loss": 77.60167694091797, "vf_explained_var": 0.7587153315544128, "kl": 0.0015954332193359733, "entropy": 1.1573811769485474, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1638400, "episodes_total": 4096, "training_iteration": 128, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-48-44", "timestamp": 1660250924, "time_this_iter_s": 31.189378023147583, "time_total_s": 9342.329505443573, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9342.329505443573, "timesteps_since_restore": 1638400, "iterations_since_restore": 128, "perf": {"cpu_util_percent": 28.313636363636366, "ram_util_percent": 58.377272727272725}} -{"episode_reward_max": 630.0, "episode_reward_min": 396.0, "episode_reward_mean": 548.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 274.355}, "custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 166.71, "shaped_reward_min": 116, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.11, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.05, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.2, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.05, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.05, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0, 527.0, 579.0, 570.0, 522.0, 576.0, 570.0, 576.0, 573.0, 510.0, 573.0, 570.0, 527.0, 543.0, 570.0, 546.0, 525.0, 573.0, 579.0, 573.0, 495.0, 576.0, 579.0, 576.0, 522.0, 579.0, 519.0, 506.0, 576.0, 579.0, 579.0, 498.0, 567.0, 576.0, 504.0, 630.0, 579.0, 573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 248.0, 279.0, 296.0, 283.0, 277.0, 293.0, 278.0, 244.0, 286.0, 290.0, 274.0, 296.0, 290.0, 286.0, 283.0, 290.0, 264.0, 246.0, 290.0, 283.0, 277.0, 293.0, 248.0, 279.0, 273.0, 270.0, 279.0, 291.0, 272.0, 274.0, 273.0, 252.0, 289.0, 284.0, 290.0, 289.0, 282.0, 291.0, 239.0, 256.0, 287.0, 289.0, 284.0, 295.0, 281.0, 295.0, 262.0, 260.0, 299.0, 280.0, 244.0, 275.0, 253.0, 253.0, 287.0, 289.0, 283.0, 296.0, 280.0, 299.0, 253.0, 245.0, 291.0, 276.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6192641289400398, "mean_processing_ms": 0.39137958658922545, "mean_inference_ms": 2.169420054882037}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3096000, "num_steps_sampled": 1651200, "sample_time_ms": 21000.262, "load_time_ms": 37.071, "grad_time_ms": 9066.202, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002854668302461505, "policy_loss": -0.004409888293594122, "vf_loss": 78.45098114013672, "vf_explained_var": 0.7681138515472412, "kl": 0.0020372606813907623, "entropy": 1.1610809564590454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1651200, "episodes_total": 4128, "training_iteration": 129, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-49-15", "timestamp": 1660250955, "time_this_iter_s": 31.373005151748657, "time_total_s": 9373.702510595322, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9373.702510595322, "timesteps_since_restore": 1651200, "iterations_since_restore": 129, "perf": {"cpu_util_percent": 30.084444444444443, "ram_util_percent": 58.27111111111109}} -{"episode_reward_max": 630.0, "episode_reward_min": 396.0, "episode_reward_mean": 544.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 272.245}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 166.09, "shaped_reward_min": 116, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.37, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.11, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.37, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.37, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0, 576.0, 504.0, 630.0, 579.0, 573.0, 576.0, 567.0, 567.0, 558.0, 530.0, 573.0, 582.0, 582.0, 521.0, 576.0, 510.0, 579.0, 579.0, 522.0, 573.0, 573.0, 576.0, 473.0, 419.0, 539.0, 573.0, 576.0, 570.0, 573.0, 484.0, 582.0, 576.0, 533.0, 396.0, 573.0, 570.0, 561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 291.0, 285.0, 244.0, 260.0, 303.0, 327.0, 274.0, 305.0, 291.0, 282.0, 285.0, 291.0, 281.0, 286.0, 282.0, 285.0, 289.0, 269.0, 264.0, 266.0, 303.0, 270.0, 307.0, 275.0, 285.0, 297.0, 253.0, 268.0, 304.0, 272.0, 231.0, 279.0, 299.0, 280.0, 289.0, 290.0, 271.0, 251.0, 300.0, 273.0, 288.0, 285.0, 290.0, 286.0, 238.0, 235.0, 212.0, 207.0, 265.0, 274.0, 288.0, 285.0, 304.0, 272.0, 277.0, 293.0, 285.0, 288.0, 261.0, 223.0, 298.0, 284.0, 289.0, 287.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.6101955338743283, "mean_processing_ms": 0.3895722397312522, "mean_inference_ms": 2.1607416335063014}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3120000, "num_steps_sampled": 1664000, "sample_time_ms": 21017.48, "load_time_ms": 36.902, "grad_time_ms": 9228.931, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00922582671046257, "policy_loss": 0.0016869133105501533, "vf_loss": 81.20984649658203, "vf_explained_var": 0.7594642043113708, "kl": 0.003354247659444809, "entropy": 1.164129376411438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1664000, "episodes_total": 4160, "training_iteration": 130, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-49-47", "timestamp": 1660250987, "time_this_iter_s": 31.841378211975098, "time_total_s": 9405.543888807297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9405.543888807297, "timesteps_since_restore": 1664000, "iterations_since_restore": 130, "perf": {"cpu_util_percent": 32.67111111111111, "ram_util_percent": 58.35999999999998}} -{"episode_reward_max": 582.0, "episode_reward_min": 396.0, "episode_reward_mean": 538.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 269.245}, "custom_metrics": {"sparse_reward_mean": 187.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 164.09, "shaped_reward_min": 116, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.98, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.94, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.81, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.59, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.61, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.21, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.59, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.61, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.59, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.61, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0, 533.0, 396.0, 573.0, 570.0, 561.0, 510.0, 525.0, 576.0, 582.0, 573.0, 564.0, 516.0, 501.0, 567.0, 524.0, 570.0, 573.0, 530.0, 501.0, 444.0, 576.0, 476.0, 576.0, 507.0, 573.0, 519.0, 570.0, 530.0, 573.0, 512.0, 527.0, 570.0, 570.0, 487.0, 530.0, 582.0, 582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 281.0, 252.0, 196.0, 200.0, 285.0, 288.0, 280.0, 290.0, 281.0, 280.0, 253.0, 257.0, 267.0, 258.0, 296.0, 280.0, 283.0, 299.0, 275.0, 298.0, 287.0, 277.0, 267.0, 249.0, 247.0, 254.0, 284.0, 283.0, 267.0, 257.0, 279.0, 291.0, 287.0, 286.0, 272.0, 258.0, 234.0, 267.0, 218.0, 226.0, 275.0, 301.0, 242.0, 234.0, 289.0, 287.0, 240.0, 267.0, 274.0, 299.0, 263.0, 256.0, 270.0, 300.0, 268.0, 262.0, 274.0, 299.0, 255.0, 257.0, 271.0, 256.0, 286.0, 284.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0]}, "sampler_perf": {"mean_env_wait_ms": 1.601265947983415, "mean_processing_ms": 0.38779407949911077, "mean_inference_ms": 2.1521646972676964}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3144000, "num_steps_sampled": 1676800, "sample_time_ms": 21002.089, "load_time_ms": 36.986, "grad_time_ms": 9370.108, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003206493565812707, "policy_loss": -0.00454886956140399, "vf_loss": 83.29342651367188, "vf_explained_var": 0.7723144888877869, "kl": 0.0017231384990736842, "entropy": 1.1479605436325073, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1676800, "episodes_total": 4192, "training_iteration": 131, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-50-19", "timestamp": 1660251019, "time_this_iter_s": 31.782477855682373, "time_total_s": 9437.32636666298, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9437.32636666298, "timesteps_since_restore": 1676800, "iterations_since_restore": 131, "perf": {"cpu_util_percent": 30.406818181818174, "ram_util_percent": 58.26818181818181}} -{"episode_reward_max": 587.0, "episode_reward_min": 396.0, "episode_reward_mean": 544.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.11}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 166.22, "shaped_reward_min": 116, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.73, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.85, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.73, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.85, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.73, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.85, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0, 570.0, 487.0, 530.0, 582.0, 582.0, 576.0, 567.0, 516.0, 522.0, 573.0, 510.0, 479.0, 524.0, 501.0, 512.0, 573.0, 522.0, 582.0, 579.0, 576.0, 576.0, 513.0, 570.0, 456.0, 567.0, 539.0, 567.0, 570.0, 576.0, 522.0, 519.0, 525.0, 570.0, 579.0, 453.0, 515.0, 573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 269.0, 301.0, 232.0, 255.0, 270.0, 260.0, 296.0, 286.0, 294.0, 288.0, 288.0, 288.0, 299.0, 268.0, 272.0, 244.0, 252.0, 270.0, 282.0, 291.0, 256.0, 254.0, 250.0, 229.0, 266.0, 258.0, 236.0, 265.0, 238.0, 274.0, 267.0, 306.0, 262.0, 260.0, 288.0, 294.0, 291.0, 288.0, 295.0, 281.0, 277.0, 299.0, 272.0, 241.0, 285.0, 285.0, 226.0, 230.0, 281.0, 286.0, 265.0, 274.0, 287.0, 280.0, 290.0, 280.0, 293.0, 283.0, 256.0, 266.0, 253.0, 266.0, 266.0, 259.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5924734608627003, "mean_processing_ms": 0.38604375166496974, "mean_inference_ms": 2.1438413106785164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3168000, "num_steps_sampled": 1689600, "sample_time_ms": 21092.562, "load_time_ms": 37.447, "grad_time_ms": 9515.71, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031676997896283865, "policy_loss": -0.004229032900184393, "vf_loss": 79.702880859375, "vf_explained_var": 0.7654879093170166, "kl": 0.0019305540481582284, "entropy": 1.1470965147018433, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1689600, "episodes_total": 4224, "training_iteration": 132, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-50-51", "timestamp": 1660251051, "time_this_iter_s": 31.913390159606934, "time_total_s": 9469.239756822586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9469.239756822586, "timesteps_since_restore": 1689600, "iterations_since_restore": 132, "perf": {"cpu_util_percent": 33.193478260869554, "ram_util_percent": 58.276086956521716}} -{"episode_reward_max": 587.0, "episode_reward_min": 123.0, "episode_reward_mean": 540.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 270.16}, "custom_metrics": {"sparse_reward_mean": 187.6, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 165.12, "shaped_reward_min": 43, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.01, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.58, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.46, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.58, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.58, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0, 570.0, 579.0, 453.0, 515.0, 573.0, 570.0, 516.0, 573.0, 570.0, 473.0, 573.0, 521.0, 570.0, 530.0, 582.0, 525.0, 579.0, 533.0, 564.0, 582.0, 461.0, 573.0, 524.0, 576.0, 516.0, 573.0, 573.0, 396.0, 576.0, 507.0, 467.0, 549.0, 527.0, 522.0, 418.0, 579.0, 570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 287.0, 283.0, 280.0, 299.0, 236.0, 217.0, 268.0, 247.0, 279.0, 294.0, 280.0, 290.0, 253.0, 263.0, 288.0, 285.0, 298.0, 272.0, 235.0, 238.0, 282.0, 291.0, 265.0, 256.0, 293.0, 277.0, 264.0, 266.0, 291.0, 291.0, 271.0, 254.0, 287.0, 292.0, 274.0, 259.0, 286.0, 278.0, 287.0, 295.0, 215.0, 246.0, 288.0, 285.0, 268.0, 256.0, 288.0, 288.0, 276.0, 240.0, 292.0, 281.0, 300.0, 273.0, 191.0, 205.0, 290.0, 286.0, 250.0, 257.0, 241.0, 226.0, 274.0, 275.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.583806400101539, "mean_processing_ms": 0.3843212568870559, "mean_inference_ms": 2.1355512648653474}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3192000, "num_steps_sampled": 1702400, "sample_time_ms": 21045.575, "load_time_ms": 37.092, "grad_time_ms": 9547.761, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004417246673256159, "policy_loss": -0.0036684710066765547, "vf_loss": 86.54926300048828, "vf_explained_var": 0.7708062529563904, "kl": 0.0019647751469165087, "entropy": 1.1384211778640747, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1702400, "episodes_total": 4256, "training_iteration": 133, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-51-22", "timestamp": 1660251082, "time_this_iter_s": 30.465492963790894, "time_total_s": 9499.705249786377, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9499.705249786377, "timesteps_since_restore": 1702400, "iterations_since_restore": 133, "perf": {"cpu_util_percent": 27.94883720930233, "ram_util_percent": 58.35813953488371}} -{"episode_reward_max": 587.0, "episode_reward_min": 123.0, "episode_reward_mean": 547.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.85}, "custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 168.1, "shaped_reward_min": 43, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.86, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0, 527.0, 522.0, 418.0, 579.0, 570.0, 504.0, 573.0, 561.0, 576.0, 582.0, 530.0, 498.0, 573.0, 573.0, 579.0, 579.0, 533.0, 522.0, 579.0, 576.0, 539.0, 519.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 495.0, 587.0, 570.0, 524.0, 579.0, 509.0, 527.0, 396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 272.0, 255.0, 262.0, 260.0, 207.0, 211.0, 284.0, 295.0, 292.0, 278.0, 232.0, 272.0, 287.0, 286.0, 289.0, 272.0, 282.0, 294.0, 286.0, 296.0, 268.0, 262.0, 253.0, 245.0, 290.0, 283.0, 278.0, 295.0, 273.0, 306.0, 290.0, 289.0, 260.0, 273.0, 267.0, 255.0, 285.0, 294.0, 293.0, 283.0, 272.0, 267.0, 248.0, 271.0, 287.0, 286.0, 288.0, 285.0, 291.0, 285.0, 289.0, 284.0, 274.0, 299.0, 282.0, 291.0, 296.0, 277.0, 250.0, 245.0, 288.0, 299.0, 289.0, 281.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5752673474082683, "mean_processing_ms": 0.382627028657334, "mean_inference_ms": 2.127290316097115}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3216000, "num_steps_sampled": 1715200, "sample_time_ms": 21118.905, "load_time_ms": 37.128, "grad_time_ms": 9783.423, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003744603367522359, "policy_loss": -0.004061851184815168, "vf_loss": 83.74505615234375, "vf_explained_var": 0.7541170120239258, "kl": 0.001809759414754808, "entropy": 1.1361082792282104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1715200, "episodes_total": 4288, "training_iteration": 134, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-51-53", "timestamp": 1660251113, "time_this_iter_s": 31.301603078842163, "time_total_s": 9531.00685286522, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9531.00685286522, "timesteps_since_restore": 1715200, "iterations_since_restore": 134, "perf": {"cpu_util_percent": 35.08181818181818, "ram_util_percent": 58.252272727272725}} -{"episode_reward_max": 582.0, "episode_reward_min": 123.0, "episode_reward_mean": 546.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.49}, "custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 40, "sparse_reward_max": 200, "shaped_reward_mean": 168.18, "shaped_reward_min": 43, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.28, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.45, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.09, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.09, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.09, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0, 524.0, 579.0, 509.0, 527.0, 396.0, 576.0, 519.0, 461.0, 522.0, 570.0, 576.0, 501.0, 570.0, 536.0, 579.0, 576.0, 522.0, 573.0, 573.0, 530.0, 519.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 573.0, 522.0, 419.0, 123.0, 582.0, 576.0, 522.0, 579.0, 576.0, 519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 260.0, 264.0, 283.0, 296.0, 246.0, 263.0, 267.0, 260.0, 203.0, 193.0, 298.0, 278.0, 254.0, 265.0, 231.0, 230.0, 259.0, 263.0, 283.0, 287.0, 287.0, 289.0, 244.0, 257.0, 282.0, 288.0, 271.0, 265.0, 287.0, 292.0, 281.0, 295.0, 263.0, 259.0, 293.0, 280.0, 281.0, 292.0, 273.0, 257.0, 252.0, 267.0, 290.0, 283.0, 265.0, 260.0, 296.0, 280.0, 297.0, 285.0, 298.0, 278.0, 298.0, 272.0, 298.0, 275.0, 256.0, 266.0, 215.0, 204.0, 60.0, 63.0, 287.0, 295.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5668312042090953, "mean_processing_ms": 0.38095382690757534, "mean_inference_ms": 2.118642826517617}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3240000, "num_steps_sampled": 1728000, "sample_time_ms": 20924.032, "load_time_ms": 37.391, "grad_time_ms": 9937.539, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035265153273940086, "policy_loss": -0.004777689930051565, "vf_loss": 88.75411224365234, "vf_explained_var": 0.7641527056694031, "kl": 0.002029512310400605, "entropy": 1.1424118280410767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1728000, "episodes_total": 4320, "training_iteration": 135, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-52-22", "timestamp": 1660251142, "time_this_iter_s": 29.18880271911621, "time_total_s": 9560.195655584335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9560.195655584335, "timesteps_since_restore": 1728000, "iterations_since_restore": 135, "perf": {"cpu_util_percent": 35.34146341463415, "ram_util_percent": 58.2390243902439}} -{"episode_reward_max": 582.0, "episode_reward_min": 228.0, "episode_reward_mean": 550.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 275.345}, "custom_metrics": {"sparse_reward_mean": 190.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 169.49, "shaped_reward_min": 68, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.86, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.31, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.55, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.22, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.22, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.22, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0, 576.0, 522.0, 579.0, 576.0, 519.0, 582.0, 582.0, 579.0, 579.0, 507.0, 576.0, 522.0, 516.0, 579.0, 582.0, 573.0, 576.0, 579.0, 530.0, 582.0, 522.0, 579.0, 576.0, 525.0, 519.0, 539.0, 570.0, 567.0, 570.0, 576.0, 579.0, 582.0, 573.0, 579.0, 582.0, 579.0, 579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 281.0, 295.0, 256.0, 266.0, 287.0, 292.0, 288.0, 288.0, 256.0, 263.0, 277.0, 305.0, 296.0, 286.0, 288.0, 291.0, 306.0, 273.0, 258.0, 249.0, 277.0, 299.0, 278.0, 244.0, 245.0, 271.0, 279.0, 300.0, 291.0, 291.0, 288.0, 285.0, 290.0, 286.0, 286.0, 293.0, 270.0, 260.0, 293.0, 289.0, 263.0, 259.0, 297.0, 282.0, 282.0, 294.0, 257.0, 268.0, 264.0, 255.0, 270.0, 269.0, 280.0, 290.0, 263.0, 304.0, 285.0, 285.0, 290.0, 286.0, 301.0, 278.0, 296.0, 286.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5585142836902617, "mean_processing_ms": 0.37930323172658476, "mean_inference_ms": 2.109956620242284}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3264000, "num_steps_sampled": 1740800, "sample_time_ms": 20787.416, "load_time_ms": 37.15, "grad_time_ms": 10003.018, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003316950984299183, "policy_loss": -0.004835940897464752, "vf_loss": 87.2677993774414, "vf_explained_var": 0.7657222151756287, "kl": 0.0019325317116454244, "entropy": 1.1477751731872559, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1740800, "episodes_total": 4352, "training_iteration": 136, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-52-52", "timestamp": 1660251172, "time_this_iter_s": 29.77871298789978, "time_total_s": 9589.974368572235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9589.974368572235, "timesteps_since_restore": 1740800, "iterations_since_restore": 136, "perf": {"cpu_util_percent": 32.416666666666664, "ram_util_percent": 58.35476190476191}} -{"episode_reward_max": 630.0, "episode_reward_min": 228.0, "episode_reward_mean": 546.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 273.105}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 167.81, "shaped_reward_min": 68, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.06, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.48, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.89, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0, 573.0, 579.0, 582.0, 579.0, 579.0, 561.0, 533.0, 582.0, 573.0, 533.0, 581.0, 459.0, 570.0, 522.0, 579.0, 570.0, 573.0, 582.0, 576.0, 582.0, 579.0, 576.0, 576.0, 567.0, 582.0, 579.0, 564.0, 582.0, 570.0, 228.0, 479.0, 573.0, 516.0, 576.0, 582.0, 522.0, 570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 296.0, 277.0, 284.0, 295.0, 280.0, 302.0, 289.0, 290.0, 301.0, 278.0, 274.0, 287.0, 266.0, 267.0, 284.0, 298.0, 292.0, 281.0, 273.0, 260.0, 288.0, 293.0, 233.0, 226.0, 287.0, 283.0, 253.0, 269.0, 290.0, 289.0, 287.0, 283.0, 279.0, 294.0, 289.0, 293.0, 297.0, 279.0, 291.0, 291.0, 277.0, 302.0, 299.0, 277.0, 299.0, 277.0, 265.0, 302.0, 290.0, 292.0, 288.0, 291.0, 289.0, 275.0, 285.0, 297.0, 277.0, 293.0, 114.0, 114.0, 248.0, 231.0, 285.0, 288.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5503391624395824, "mean_processing_ms": 0.37768347850915746, "mean_inference_ms": 2.101391542622976}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3288000, "num_steps_sampled": 1753600, "sample_time_ms": 20872.885, "load_time_ms": 37.097, "grad_time_ms": 9999.872, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012538364389911294, "policy_loss": -0.005918627139180899, "vf_loss": 77.47673797607422, "vf_explained_var": 0.7781977653503418, "kl": 0.0019029680406674743, "entropy": 1.1504276990890503, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1753600, "episodes_total": 4384, "training_iteration": 137, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-53-22", "timestamp": 1660251202, "time_this_iter_s": 30.53275179862976, "time_total_s": 9620.507120370865, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9620.507120370865, "timesteps_since_restore": 1753600, "iterations_since_restore": 137, "perf": {"cpu_util_percent": 31.753488372093024, "ram_util_percent": 58.406976744186025}} -{"episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 546.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 142.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 273.055}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 168.11, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.22, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.22, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 1.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0, 516.0, 576.0, 582.0, 522.0, 570.0, 579.0, 510.0, 582.0, 573.0, 573.0, 470.0, 579.0, 530.0, 567.0, 573.0, 533.0, 582.0, 461.0, 496.0, 510.0, 513.0, 579.0, 530.0, 570.0, 525.0, 525.0, 573.0, 294.0, 576.0, 539.0, 522.0, 498.0, 573.0, 576.0, 579.0, 570.0, 525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 256.0, 260.0, 292.0, 284.0, 300.0, 282.0, 253.0, 269.0, 291.0, 279.0, 270.0, 309.0, 255.0, 255.0, 282.0, 300.0, 275.0, 298.0, 293.0, 280.0, 239.0, 231.0, 293.0, 286.0, 260.0, 270.0, 287.0, 280.0, 273.0, 300.0, 275.0, 258.0, 285.0, 297.0, 229.0, 232.0, 252.0, 244.0, 270.0, 240.0, 241.0, 272.0, 288.0, 291.0, 249.0, 281.0, 287.0, 283.0, 257.0, 268.0, 258.0, 267.0, 279.0, 294.0, 142.0, 152.0, 285.0, 291.0, 281.0, 258.0, 262.0, 260.0, 261.0, 237.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5422943636827329, "mean_processing_ms": 0.376087334740523, "mean_inference_ms": 2.0930739405664296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3312000, "num_steps_sampled": 1766400, "sample_time_ms": 20654.49, "load_time_ms": 37.188, "grad_time_ms": 9982.936, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023388864938169718, "policy_loss": -0.0055216290056705475, "vf_loss": 84.28978729248047, "vf_explained_var": 0.7621362209320068, "kl": 0.0017433507600799203, "entropy": 1.136921763420105, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1766400, "episodes_total": 4416, "training_iteration": 138, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-53-51", "timestamp": 1660251231, "time_this_iter_s": 28.834796905517578, "time_total_s": 9649.341917276382, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9649.341917276382, "timesteps_since_restore": 1766400, "iterations_since_restore": 138, "perf": {"cpu_util_percent": 31.565853658536582, "ram_util_percent": 58.34634146341463}} -{"episode_reward_max": 630.0, "episode_reward_min": 342.0, "episode_reward_mean": 553.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 276.825}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.05, "shaped_reward_min": 102, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.1, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.18, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0, 573.0, 576.0, 579.0, 570.0, 525.0, 576.0, 479.0, 576.0, 570.0, 342.0, 470.0, 573.0, 510.0, 518.0, 570.0, 582.0, 539.0, 570.0, 510.0, 587.0, 579.0, 516.0, 516.0, 582.0, 576.0, 576.0, 570.0, 576.0, 519.0, 570.0, 581.0, 630.0, 573.0, 522.0, 573.0, 516.0, 630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 292.0, 281.0, 287.0, 289.0, 285.0, 294.0, 279.0, 291.0, 274.0, 251.0, 298.0, 278.0, 254.0, 225.0, 297.0, 279.0, 286.0, 284.0, 166.0, 176.0, 221.0, 249.0, 296.0, 277.0, 259.0, 251.0, 259.0, 259.0, 298.0, 272.0, 283.0, 299.0, 271.0, 268.0, 293.0, 277.0, 264.0, 246.0, 285.0, 302.0, 291.0, 288.0, 286.0, 230.0, 249.0, 267.0, 284.0, 298.0, 293.0, 283.0, 288.0, 288.0, 283.0, 287.0, 285.0, 291.0, 257.0, 262.0, 286.0, 284.0, 291.0, 290.0, 316.0, 314.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.534348979817542, "mean_processing_ms": 0.37451084317148714, "mean_inference_ms": 2.0846763834338202}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3336000, "num_steps_sampled": 1779200, "sample_time_ms": 20417.117, "load_time_ms": 37.045, "grad_time_ms": 9852.38, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006708970759063959, "policy_loss": -0.0014362437650561333, "vf_loss": 87.18399810791016, "vf_explained_var": 0.7458827495574951, "kl": 0.0019282657885923982, "entropy": 1.1463767290115356, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1779200, "episodes_total": 4448, "training_iteration": 139, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-54-19", "timestamp": 1660251259, "time_this_iter_s": 27.688152074813843, "time_total_s": 9677.030069351196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9677.030069351196, "timesteps_since_restore": 1779200, "iterations_since_restore": 139, "perf": {"cpu_util_percent": 30.3025641025641, "ram_util_percent": 58.341025641025624}} -{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 556.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 278.16}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.72, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.18, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.1, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.1, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.1, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 573.0, 516.0, 630.0, 459.0, 573.0, 579.0, 527.0, 519.0, 579.0, 525.0, 516.0, 579.0, 579.0, 579.0, 579.0, 573.0, 449.0, 579.0, 579.0, 576.0, 441.0, 576.0, 533.0, 516.0, 582.0, 516.0, 579.0, 579.0, 567.0, 587.0, 527.0, 579.0, 573.0, 579.0, 576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 277.0, 296.0, 262.0, 260.0, 283.0, 290.0, 264.0, 252.0, 313.0, 317.0, 230.0, 229.0, 295.0, 278.0, 295.0, 284.0, 275.0, 252.0, 266.0, 253.0, 285.0, 294.0, 260.0, 265.0, 261.0, 255.0, 285.0, 294.0, 272.0, 307.0, 284.0, 295.0, 273.0, 306.0, 290.0, 283.0, 228.0, 221.0, 296.0, 283.0, 279.0, 300.0, 299.0, 277.0, 226.0, 215.0, 280.0, 296.0, 262.0, 271.0, 261.0, 255.0, 288.0, 294.0, 267.0, 249.0, 289.0, 290.0, 281.0, 298.0, 279.0, 288.0, 286.0, 301.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5264986068464634, "mean_processing_ms": 0.37294987476110114, "mean_inference_ms": 2.0764572301568647}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3360000, "num_steps_sampled": 1792000, "sample_time_ms": 20405.098, "load_time_ms": 37.005, "grad_time_ms": 9490.101, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018680343637242913, "policy_loss": -0.005901841446757317, "vf_loss": 83.4326400756836, "vf_explained_var": 0.7634987831115723, "kl": 0.002031019888818264, "entropy": 1.1467581987380981, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1792000, "episodes_total": 4480, "training_iteration": 140, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-54-47", "timestamp": 1660251287, "time_this_iter_s": 28.096507787704468, "time_total_s": 9705.1265771389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9705.1265771389, "timesteps_since_restore": 1792000, "iterations_since_restore": 140, "perf": {"cpu_util_percent": 30.9875, "ram_util_percent": 58.3925}} -{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 280.105}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 171.81, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.17, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.21, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.75, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.21, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.21, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0, 527.0, 579.0, 573.0, 579.0, 576.0, 345.0, 465.0, 558.0, 570.0, 573.0, 582.0, 573.0, 576.0, 573.0, 573.0, 573.0, 582.0, 579.0, 582.0, 582.0, 567.0, 582.0, 576.0, 579.0, 587.0, 519.0, 570.0, 567.0, 579.0, 582.0, 533.0, 522.0, 587.0, 504.0, 630.0, 536.0, 530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 256.0, 271.0, 280.0, 299.0, 275.0, 298.0, 283.0, 296.0, 276.0, 300.0, 177.0, 168.0, 229.0, 236.0, 275.0, 283.0, 271.0, 299.0, 292.0, 281.0, 286.0, 296.0, 287.0, 286.0, 283.0, 293.0, 293.0, 280.0, 280.0, 293.0, 296.0, 277.0, 293.0, 289.0, 272.0, 307.0, 294.0, 288.0, 298.0, 284.0, 272.0, 295.0, 293.0, 289.0, 287.0, 289.0, 287.0, 292.0, 291.0, 296.0, 257.0, 262.0, 273.0, 297.0, 281.0, 286.0, 298.0, 281.0, 282.0, 300.0, 273.0, 260.0, 272.0, 250.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0]}, "sampler_perf": {"mean_env_wait_ms": 1.518751324463327, "mean_processing_ms": 0.37140910407762817, "mean_inference_ms": 2.0683253134575508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3384000, "num_steps_sampled": 1804800, "sample_time_ms": 20217.154, "load_time_ms": 36.826, "grad_time_ms": 9453.413, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004852355923503637, "policy_loss": -0.003466278314590454, "vf_loss": 88.89630126953125, "vf_explained_var": 0.7491546273231506, "kl": 0.0020531185436993837, "entropy": 1.1419917345046997, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1804800, "episodes_total": 4512, "training_iteration": 141, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-55-17", "timestamp": 1660251317, "time_this_iter_s": 29.532893180847168, "time_total_s": 9734.659470319748, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9734.659470319748, "timesteps_since_restore": 1804800, "iterations_since_restore": 141, "perf": {"cpu_util_percent": 29.842857142857145, "ram_util_percent": 58.3642857142857}} -{"episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 554.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 277.34}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.68, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.82, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.15, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0, 587.0, 504.0, 630.0, 536.0, 530.0, 567.0, 530.0, 573.0, 576.0, 521.0, 570.0, 579.0, 501.0, 573.0, 522.0, 579.0, 630.0, 576.0, 570.0, 576.0, 579.0, 522.0, 576.0, 525.0, 576.0, 579.0, 567.0, 579.0, 576.0, 579.0, 582.0, 576.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 295.0, 292.0, 252.0, 252.0, 307.0, 323.0, 267.0, 269.0, 269.0, 261.0, 280.0, 287.0, 249.0, 281.0, 293.0, 280.0, 288.0, 288.0, 251.0, 270.0, 280.0, 290.0, 277.0, 302.0, 242.0, 259.0, 295.0, 278.0, 258.0, 264.0, 285.0, 294.0, 320.0, 310.0, 284.0, 292.0, 287.0, 283.0, 296.0, 280.0, 285.0, 294.0, 257.0, 265.0, 297.0, 279.0, 262.0, 263.0, 288.0, 288.0, 298.0, 281.0, 293.0, 274.0, 301.0, 278.0, 293.0, 283.0, 298.0, 281.0, 291.0, 291.0, 266.0, 310.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5111310718572366, "mean_processing_ms": 0.36989117514475767, "mean_inference_ms": 2.0605337474583503}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3408000, "num_steps_sampled": 1817600, "sample_time_ms": 20052.563, "load_time_ms": 36.402, "grad_time_ms": 9487.641, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00461258739233017, "policy_loss": -0.0034613541793078184, "vf_loss": 86.5114974975586, "vf_explained_var": 0.770569384098053, "kl": 0.0022539596538990736, "entropy": 1.154403805732727, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1817600, "episodes_total": 4544, "training_iteration": 142, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-55-47", "timestamp": 1660251347, "time_this_iter_s": 30.608631134033203, "time_total_s": 9765.268101453781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9765.268101453781, "timesteps_since_restore": 1817600, "iterations_since_restore": 142, "perf": {"cpu_util_percent": 30.76046511627907, "ram_util_percent": 58.4186046511628}} -{"episode_reward_max": 633.0, "episode_reward_min": 123.0, "episode_reward_mean": 553.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 276.845}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.49, "shaped_reward_min": 43, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.23, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.87, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.68, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.01, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.6, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.01, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.01, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0, 504.0, 579.0, 573.0, 408.0, 573.0, 522.0, 504.0, 573.0, 579.0, 530.0, 582.0, 576.0, 579.0, 579.0, 570.0, 525.0, 573.0, 582.0, 630.0, 558.0, 576.0, 576.0, 576.0, 533.0, 582.0, 582.0, 530.0, 530.0, 630.0, 576.0, 516.0, 476.0, 579.0, 582.0, 573.0, 576.0, 522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 236.0, 268.0, 285.0, 294.0, 290.0, 283.0, 205.0, 203.0, 293.0, 280.0, 262.0, 260.0, 260.0, 244.0, 277.0, 296.0, 296.0, 283.0, 264.0, 266.0, 301.0, 281.0, 285.0, 291.0, 289.0, 290.0, 291.0, 288.0, 287.0, 283.0, 263.0, 262.0, 292.0, 281.0, 301.0, 281.0, 303.0, 327.0, 272.0, 286.0, 296.0, 280.0, 282.0, 294.0, 282.0, 294.0, 268.0, 265.0, 298.0, 284.0, 298.0, 284.0, 255.0, 275.0, 253.0, 277.0, 311.0, 319.0, 288.0, 288.0, 250.0, 266.0, 238.0, 238.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.5036560426122845, "mean_processing_ms": 0.36840756428267724, "mean_inference_ms": 2.053955603003225}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3432000, "num_steps_sampled": 1830400, "sample_time_ms": 20726.741, "load_time_ms": 36.338, "grad_time_ms": 9326.632, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004940376617014408, "policy_loss": -0.002967018634080887, "vf_loss": 84.7812271118164, "vf_explained_var": 0.7767437100410461, "kl": 0.0015952900284901261, "entropy": 1.1414709091186523, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1830400, "episodes_total": 4576, "training_iteration": 143, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-56-23", "timestamp": 1660251383, "time_this_iter_s": 35.59740996360779, "time_total_s": 9800.865511417389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9800.865511417389, "timesteps_since_restore": 1830400, "iterations_since_restore": 143, "perf": {"cpu_util_percent": 28.452, "ram_util_percent": 58.38199999999999}} -{"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 555.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 277.9}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 171.4, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.79, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0, 579.0, 582.0, 573.0, 576.0, 522.0, 455.0, 582.0, 123.0, 476.0, 527.0, 579.0, 582.0, 567.0, 579.0, 579.0, 579.0, 579.0, 481.0, 576.0, 587.0, 582.0, 576.0, 579.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 413.0, 579.0, 570.0, 465.0, 579.0, 579.0, 573.0, 579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 292.0, 287.0, 309.0, 273.0, 286.0, 287.0, 289.0, 287.0, 248.0, 274.0, 248.0, 207.0, 280.0, 302.0, 60.0, 63.0, 249.0, 227.0, 275.0, 252.0, 284.0, 295.0, 297.0, 285.0, 282.0, 285.0, 301.0, 278.0, 291.0, 288.0, 278.0, 301.0, 286.0, 293.0, 236.0, 245.0, 277.0, 299.0, 294.0, 293.0, 293.0, 289.0, 279.0, 297.0, 286.0, 293.0, 283.0, 287.0, 288.0, 291.0, 273.0, 309.0, 287.0, 286.0, 301.0, 278.0, 294.0, 285.0, 211.0, 202.0, 295.0, 284.0, 285.0, 285.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4962976590401342, "mean_processing_ms": 0.3669482669309259, "mean_inference_ms": 2.047611703358859}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3456000, "num_steps_sampled": 1843200, "sample_time_ms": 20697.837, "load_time_ms": 36.511, "grad_time_ms": 9260.983, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007136467844247818, "policy_loss": -0.0009602725622244179, "vf_loss": 86.6334457397461, "vf_explained_var": 0.7632217407226562, "kl": 0.0016821371391415596, "entropy": 1.1331907510757446, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1843200, "episodes_total": 4608, "training_iteration": 144, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-56-53", "timestamp": 1660251413, "time_this_iter_s": 30.354671239852905, "time_total_s": 9831.220182657242, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9831.220182657242, "timesteps_since_restore": 1843200, "iterations_since_restore": 144, "perf": {"cpu_util_percent": 32.744186046511636, "ram_util_percent": 58.41860465116278}} -{"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 559.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 279.92}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 171.84, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.68, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.39, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.61, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.8, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.69, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.61, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.8, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.61, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.8, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0, 465.0, 579.0, 579.0, 573.0, 579.0, 533.0, 570.0, 633.0, 582.0, 579.0, 573.0, 582.0, 123.0, 573.0, 579.0, 576.0, 579.0, 582.0, 576.0, 524.0, 573.0, 584.0, 573.0, 582.0, 579.0, 518.0, 573.0, 573.0, 564.0, 576.0, 573.0, 567.0, 633.0, 527.0, 579.0, 630.0, 579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 233.0, 232.0, 292.0, 287.0, 289.0, 290.0, 288.0, 285.0, 281.0, 298.0, 259.0, 274.0, 285.0, 285.0, 302.0, 331.0, 280.0, 302.0, 281.0, 298.0, 286.0, 287.0, 286.0, 296.0, 60.0, 63.0, 288.0, 285.0, 295.0, 284.0, 275.0, 301.0, 280.0, 299.0, 303.0, 279.0, 289.0, 287.0, 275.0, 249.0, 292.0, 281.0, 291.0, 293.0, 292.0, 281.0, 291.0, 291.0, 288.0, 291.0, 235.0, 283.0, 293.0, 280.0, 284.0, 289.0, 276.0, 288.0, 284.0, 292.0, 294.0, 279.0, 278.0, 289.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 1.489071157531513, "mean_processing_ms": 0.36552392885765655, "mean_inference_ms": 2.041836182574639}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3480000, "num_steps_sampled": 1856000, "sample_time_ms": 21121.64, "load_time_ms": 36.034, "grad_time_ms": 9412.787, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004381807986646891, "policy_loss": -0.0034314494114369154, "vf_loss": 83.83314514160156, "vf_explained_var": 0.7805802226066589, "kl": 0.0022449749521911144, "entropy": 1.1401251554489136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1856000, "episodes_total": 4640, "training_iteration": 145, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-57-28", "timestamp": 1660251448, "time_this_iter_s": 34.93755006790161, "time_total_s": 9866.157732725143, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9866.157732725143, "timesteps_since_restore": 1856000, "iterations_since_restore": 145, "perf": {"cpu_util_percent": 34.077999999999996, "ram_util_percent": 58.46000000000001}} -{"episode_reward_max": 636.0, "episode_reward_min": 234.0, "episode_reward_mean": 565.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 112.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 282.895}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.19, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.75, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.75, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.75, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0, 633.0, 527.0, 579.0, 630.0, 579.0, 576.0, 570.0, 582.0, 579.0, 582.0, 533.0, 495.0, 573.0, 533.0, 582.0, 582.0, 582.0, 522.0, 518.0, 524.0, 587.0, 579.0, 579.0, 579.0, 582.0, 579.0, 636.0, 510.0, 419.0, 570.0, 576.0, 579.0, 576.0, 573.0, 570.0, 587.0, 579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 301.0, 332.0, 260.0, 267.0, 278.0, 301.0, 314.0, 316.0, 283.0, 296.0, 282.0, 294.0, 273.0, 297.0, 286.0, 296.0, 296.0, 283.0, 280.0, 302.0, 268.0, 265.0, 235.0, 260.0, 279.0, 294.0, 266.0, 267.0, 299.0, 283.0, 292.0, 290.0, 290.0, 292.0, 263.0, 259.0, 256.0, 262.0, 261.0, 263.0, 314.0, 273.0, 298.0, 281.0, 288.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 302.0, 320.0, 316.0, 261.0, 249.0, 211.0, 208.0, 299.0, 271.0, 283.0, 293.0, 280.0, 299.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4819236352558276, "mean_processing_ms": 0.3641093342877964, "mean_inference_ms": 2.0353369545862554}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3504000, "num_steps_sampled": 1868800, "sample_time_ms": 21304.847, "load_time_ms": 36.48, "grad_time_ms": 9579.852, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003786050481721759, "policy_loss": -0.004469693172723055, "vf_loss": 88.19132232666016, "vf_explained_var": 0.7629249095916748, "kl": 0.0019031836418434978, "entropy": 1.1267800331115723, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1868800, "episodes_total": 4672, "training_iteration": 146, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-58-02", "timestamp": 1660251482, "time_this_iter_s": 33.29244089126587, "time_total_s": 9899.45017361641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9899.45017361641, "timesteps_since_restore": 1868800, "iterations_since_restore": 146, "perf": {"cpu_util_percent": 34.12765957446809, "ram_util_percent": 58.40212765957448}} -{"episode_reward_max": 630.0, "episode_reward_min": 234.0, "episode_reward_mean": 569.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 112.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.95}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.5, "shaped_reward_min": 74, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.88, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 570.0, 587.0, 579.0, 576.0, 501.0, 582.0, 573.0, 576.0, 582.0, 570.0, 530.0, 579.0, 234.0, 576.0, 567.0, 630.0, 573.0, 582.0, 530.0, 573.0, 579.0, 576.0, 570.0, 576.0, 579.0, 522.0, 570.0, 522.0, 552.0, 576.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 296.0, 280.0, 288.0, 285.0, 301.0, 269.0, 278.0, 309.0, 273.0, 306.0, 290.0, 286.0, 255.0, 246.0, 283.0, 299.0, 280.0, 293.0, 297.0, 279.0, 292.0, 290.0, 280.0, 290.0, 277.0, 253.0, 283.0, 296.0, 112.0, 122.0, 283.0, 293.0, 285.0, 282.0, 316.0, 314.0, 292.0, 281.0, 282.0, 300.0, 271.0, 259.0, 280.0, 293.0, 279.0, 300.0, 291.0, 285.0, 272.0, 298.0, 294.0, 282.0, 289.0, 290.0, 259.0, 263.0, 284.0, 286.0, 252.0, 270.0, 285.0, 267.0, 298.0, 278.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4748545298111457, "mean_processing_ms": 0.362707314353448, "mean_inference_ms": 2.028542543914196}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3528000, "num_steps_sampled": 1881600, "sample_time_ms": 21110.787, "load_time_ms": 36.619, "grad_time_ms": 9537.52, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004449079744517803, "policy_loss": -0.0040388829074800014, "vf_loss": 90.50656127929688, "vf_explained_var": 0.7546594142913818, "kl": 0.0021286073606461287, "entropy": 1.1253728866577148, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1881600, "episodes_total": 4704, "training_iteration": 147, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-58-30", "timestamp": 1660251510, "time_this_iter_s": 28.167391061782837, "time_total_s": 9927.617564678192, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9927.617564678192, "timesteps_since_restore": 1881600, "iterations_since_restore": 147, "perf": {"cpu_util_percent": 35.05, "ram_util_percent": 58.895}} -{"episode_reward_max": 630.0, "episode_reward_min": 419.0, "episode_reward_mean": 573.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 206.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 286.76}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.12, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.36, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 582.0, 522.0, 522.0, 579.0, 627.0, 582.0, 525.0, 627.0, 576.0, 579.0, 579.0, 530.0, 582.0, 582.0, 579.0, 582.0, 582.0, 519.0, 575.0, 576.0, 579.0, 582.0, 576.0, 587.0, 579.0, 627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 277.0, 299.0, 283.0, 296.0, 282.0, 297.0, 276.0, 303.0, 291.0, 288.0, 296.0, 283.0, 274.0, 305.0, 286.0, 296.0, 293.0, 286.0, 285.0, 297.0, 296.0, 283.0, 280.0, 302.0, 269.0, 253.0, 273.0, 249.0, 281.0, 298.0, 321.0, 306.0, 283.0, 299.0, 249.0, 276.0, 320.0, 307.0, 293.0, 283.0, 299.0, 280.0, 283.0, 296.0, 270.0, 260.0, 296.0, 286.0, 297.0, 285.0, 294.0, 285.0, 289.0, 293.0, 279.0, 303.0, 258.0, 261.0, 296.0, 279.0, 293.0, 283.0, 292.0, 287.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4678650225470864, "mean_processing_ms": 0.36131377085229416, "mean_inference_ms": 2.021530568417585}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3552000, "num_steps_sampled": 1894400, "sample_time_ms": 21206.603, "load_time_ms": 36.564, "grad_time_ms": 9559.344, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005005656275898218, "policy_loss": -0.0032577281817793846, "vf_loss": 88.27960205078125, "vf_explained_var": 0.7724118232727051, "kl": 0.001774882897734642, "entropy": 1.1291688680648804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1894400, "episodes_total": 4736, "training_iteration": 148, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-59-00", "timestamp": 1660251540, "time_this_iter_s": 30.007760047912598, "time_total_s": 9957.625324726105, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9957.625324726105, "timesteps_since_restore": 1894400, "iterations_since_restore": 148, "perf": {"cpu_util_percent": 34.11190476190476, "ram_util_percent": 58.61190476190477}} -{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 568.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 284.445}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 175.29, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.47, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.64, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0, 582.0, 576.0, 587.0, 579.0, 627.0, 579.0, 579.0, 582.0, 582.0, 579.0, 582.0, 582.0, 576.0, 579.0, 587.0, 581.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 536.0, 579.0, 587.0, 510.0, 582.0, 582.0, 579.0, 582.0, 570.0, 573.0, 579.0, 576.0, 576.0, 582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 279.0, 303.0, 292.0, 284.0, 287.0, 300.0, 291.0, 288.0, 312.0, 315.0, 280.0, 299.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 288.0, 291.0, 283.0, 299.0, 288.0, 294.0, 294.0, 282.0, 288.0, 291.0, 304.0, 283.0, 282.0, 299.0, 270.0, 303.0, 289.0, 290.0, 278.0, 298.0, 289.0, 290.0, 288.0, 288.0, 278.0, 304.0, 296.0, 280.0, 279.0, 257.0, 297.0, 282.0, 288.0, 299.0, 240.0, 270.0, 302.0, 280.0, 293.0, 289.0, 286.0, 293.0, 301.0, 281.0, 285.0, 285.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4609623899935302, "mean_processing_ms": 0.3599358567345953, "mean_inference_ms": 2.0144329368432397}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3576000, "num_steps_sampled": 1907200, "sample_time_ms": 21431.224, "load_time_ms": 36.685, "grad_time_ms": 9588.21, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004353505093604326, "policy_loss": -0.003862809156998992, "vf_loss": 87.85071563720703, "vf_explained_var": 0.7780687212944031, "kl": 0.002437218790873885, "entropy": 1.137519359588623, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1907200, "episodes_total": 4768, "training_iteration": 149, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_21-59-30", "timestamp": 1660251570, "time_this_iter_s": 30.225661993026733, "time_total_s": 9987.850986719131, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 9987.850986719131, "timesteps_since_restore": 1907200, "iterations_since_restore": 149, "perf": {"cpu_util_percent": 31.69069767441861, "ram_util_percent": 58.481395348837225}} -{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 282.965}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.93, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.19, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.68, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.26, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.57, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.26, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.57, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.26, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.57, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0, 573.0, 579.0, 576.0, 576.0, 582.0, 582.0, 570.0, 579.0, 419.0, 567.0, 579.0, 579.0, 582.0, 579.0, 519.0, 582.0, 510.0, 582.0, 579.0, 582.0, 573.0, 630.0, 579.0, 579.0, 579.0, 624.0, 518.0, 579.0, 519.0, 579.0, 579.0, 573.0, 581.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 296.0, 277.0, 280.0, 299.0, 288.0, 288.0, 290.0, 286.0, 300.0, 282.0, 289.0, 293.0, 290.0, 280.0, 291.0, 288.0, 206.0, 213.0, 283.0, 284.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 289.0, 290.0, 265.0, 254.0, 283.0, 299.0, 259.0, 251.0, 299.0, 283.0, 290.0, 289.0, 285.0, 297.0, 280.0, 293.0, 328.0, 302.0, 286.0, 293.0, 296.0, 283.0, 293.0, 286.0, 320.0, 304.0, 282.0, 236.0, 306.0, 273.0, 262.0, 257.0, 287.0, 292.0, 295.0, 284.0, 293.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4541632476370934, "mean_processing_ms": 0.35857867420290873, "mean_inference_ms": 2.0077131328660243}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3600000, "num_steps_sampled": 1920000, "sample_time_ms": 21437.009, "load_time_ms": 37.013, "grad_time_ms": 9883.239, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004526351112872362, "policy_loss": -0.0033871959894895554, "vf_loss": 84.81644439697266, "vf_explained_var": 0.7658727169036865, "kl": 0.002766131656244397, "entropy": 1.1361898183822632, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1920000, "episodes_total": 4800, "training_iteration": 150, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-00-01", "timestamp": 1660251601, "time_this_iter_s": 31.108325004577637, "time_total_s": 10018.95931172371, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10018.95931172371, "timesteps_since_restore": 1920000, "iterations_since_restore": 150, "perf": {"cpu_util_percent": 34.15227272727273, "ram_util_percent": 58.540909090909096}} -{"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 86.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 282.735}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.87, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.46, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.28, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.28, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.28, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0, 581.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 582.0, 462.0, 573.0, 564.0, 582.0, 579.0, 573.0, 180.0, 582.0, 579.0, 564.0, 579.0, 564.0, 579.0, 530.0, 579.0, 576.0, 579.0, 630.0, 582.0, 579.0, 582.0, 579.0, 560.0, 524.0, 576.0, 567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 292.0, 289.0, 291.0, 285.0, 303.0, 284.0, 275.0, 301.0, 287.0, 292.0, 298.0, 281.0, 280.0, 299.0, 291.0, 291.0, 284.0, 292.0, 288.0, 294.0, 295.0, 287.0, 230.0, 232.0, 280.0, 293.0, 267.0, 297.0, 296.0, 286.0, 291.0, 288.0, 305.0, 268.0, 86.0, 94.0, 296.0, 286.0, 288.0, 291.0, 285.0, 279.0, 281.0, 298.0, 277.0, 287.0, 293.0, 286.0, 276.0, 254.0, 274.0, 305.0, 285.0, 291.0, 299.0, 280.0, 312.0, 318.0, 288.0, 294.0, 293.0, 286.0, 294.0, 288.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4474584223491644, "mean_processing_ms": 0.3572431217885297, "mean_inference_ms": 2.0013634824095012}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3624000, "num_steps_sampled": 1932800, "sample_time_ms": 21691.813, "load_time_ms": 37.111, "grad_time_ms": 9921.937, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037641674280166626, "policy_loss": -0.004076274111866951, "vf_loss": 84.02509307861328, "vf_explained_var": 0.7596387267112732, "kl": 0.001788324792869389, "entropy": 1.124145746231079, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1932800, "episodes_total": 4832, "training_iteration": 151, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-00-34", "timestamp": 1660251634, "time_this_iter_s": 32.471389293670654, "time_total_s": 10051.43070101738, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10051.43070101738, "timesteps_since_restore": 1932800, "iterations_since_restore": 151, "perf": {"cpu_util_percent": 34.02391304347826, "ram_util_percent": 58.56739130434782}} -{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 568.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.035}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.27, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.39, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.51, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.4, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.51, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.4, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.51, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.4, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0, 579.0, 560.0, 524.0, 576.0, 567.0, 587.0, 573.0, 579.0, 522.0, 573.0, 579.0, 582.0, 570.0, 525.0, 579.0, 567.0, 576.0, 570.0, 576.0, 582.0, 582.0, 587.0, 576.0, 530.0, 630.0, 582.0, 579.0, 456.0, 579.0, 581.0, 573.0, 576.0, 516.0, 579.0, 582.0, 576.0, 582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 273.0, 306.0, 282.0, 278.0, 256.0, 268.0, 280.0, 296.0, 292.0, 275.0, 291.0, 296.0, 296.0, 277.0, 301.0, 278.0, 269.0, 253.0, 288.0, 285.0, 293.0, 286.0, 283.0, 299.0, 275.0, 295.0, 260.0, 265.0, 292.0, 287.0, 278.0, 289.0, 281.0, 295.0, 272.0, 298.0, 292.0, 284.0, 288.0, 294.0, 285.0, 297.0, 292.0, 295.0, 291.0, 285.0, 276.0, 254.0, 319.0, 311.0, 308.0, 274.0, 277.0, 302.0, 225.0, 231.0, 295.0, 284.0, 285.0, 296.0, 294.0, 279.0, 277.0, 299.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4408300434942674, "mean_processing_ms": 0.35592594931553234, "mean_inference_ms": 1.9949418939108405}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3648000, "num_steps_sampled": 1945600, "sample_time_ms": 21661.173, "load_time_ms": 37.278, "grad_time_ms": 9874.5, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006686341017484665, "policy_loss": -0.0018611648119986057, "vf_loss": 91.119873046875, "vf_explained_var": 0.7503556609153748, "kl": 0.002358483849093318, "entropy": 1.128965973854065, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1945600, "episodes_total": 4864, "training_iteration": 152, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-03", "timestamp": 1660251663, "time_this_iter_s": 29.826536893844604, "time_total_s": 10081.257237911224, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10081.257237911224, "timesteps_since_restore": 1945600, "iterations_since_restore": 152, "perf": {"cpu_util_percent": 34.71904761904763, "ram_util_percent": 58.37619047619047}} -{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 569.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.96}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.32, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0, 516.0, 579.0, 582.0, 576.0, 582.0, 570.0, 516.0, 425.0, 570.0, 584.0, 570.0, 573.0, 579.0, 579.0, 576.0, 579.0, 567.0, 579.0, 573.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 579.0, 464.0, 582.0, 582.0, 570.0, 579.0, 573.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 249.0, 267.0, 290.0, 289.0, 276.0, 306.0, 282.0, 294.0, 285.0, 297.0, 295.0, 275.0, 268.0, 248.0, 220.0, 205.0, 285.0, 285.0, 301.0, 283.0, 285.0, 285.0, 283.0, 290.0, 282.0, 297.0, 299.0, 280.0, 268.0, 308.0, 288.0, 291.0, 270.0, 297.0, 294.0, 285.0, 282.0, 291.0, 280.0, 299.0, 289.0, 293.0, 290.0, 286.0, 292.0, 287.0, 288.0, 288.0, 300.0, 279.0, 302.0, 280.0, 286.0, 290.0, 286.0, 296.0, 283.0, 296.0, 232.0, 232.0, 285.0, 297.0, 302.0, 280.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4342721716334546, "mean_processing_ms": 0.3546223616494384, "mean_inference_ms": 1.9881963342076971}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3672000, "num_steps_sampled": 1958400, "sample_time_ms": 20770.751, "load_time_ms": 37.491, "grad_time_ms": 9746.757, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023278051521629095, "policy_loss": -0.0060347276739776134, "vf_loss": 89.3071060180664, "vf_explained_var": 0.7670709490776062, "kl": 0.0017067408189177513, "entropy": 1.1363595724105835, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1958400, "episodes_total": 4896, "training_iteration": 153, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-29", "timestamp": 1660251689, "time_this_iter_s": 25.417139053344727, "time_total_s": 10106.67437696457, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10106.67437696457, "timesteps_since_restore": 1958400, "iterations_since_restore": 153, "perf": {"cpu_util_percent": 33.84166666666667, "ram_util_percent": 58.383333333333326}} -{"episode_reward_max": 630.0, "episode_reward_min": 402.0, "episode_reward_mean": 571.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 189.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.78}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.76, "shaped_reward_min": 122, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.56, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.58, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.6, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.58, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.6, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.58, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.6, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0, 570.0, 579.0, 573.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 576.0, 582.0, 587.0, 402.0, 564.0, 582.0, 533.0, 525.0, 579.0, 576.0, 587.0, 579.0, 539.0, 582.0, 582.0, 582.0, 533.0, 570.0, 584.0, 579.0, 579.0, 582.0, 630.0, 579.0, 587.0, 582.0, 579.0, 579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 278.0, 292.0, 291.0, 288.0, 292.0, 281.0, 299.0, 280.0, 283.0, 296.0, 293.0, 286.0, 282.0, 297.0, 276.0, 306.0, 285.0, 291.0, 285.0, 291.0, 291.0, 291.0, 289.0, 298.0, 189.0, 213.0, 290.0, 274.0, 293.0, 289.0, 272.0, 261.0, 262.0, 263.0, 293.0, 286.0, 275.0, 301.0, 294.0, 293.0, 280.0, 299.0, 264.0, 275.0, 281.0, 301.0, 299.0, 283.0, 281.0, 301.0, 263.0, 270.0, 276.0, 294.0, 301.0, 283.0, 296.0, 283.0, 295.0, 284.0, 288.0, 294.0, 321.0, 309.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4277901540826492, "mean_processing_ms": 0.3533349618976105, "mean_inference_ms": 1.9812328755781439}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3696000, "num_steps_sampled": 1971200, "sample_time_ms": 20717.538, "load_time_ms": 37.296, "grad_time_ms": 9558.366, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0045716362074017525, "policy_loss": -0.004014193546026945, "vf_loss": 91.47116088867188, "vf_explained_var": 0.753397524356842, "kl": 0.001791521324776113, "entropy": 1.1225804090499878, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1971200, "episodes_total": 4928, "training_iteration": 154, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-01-57", "timestamp": 1660251717, "time_this_iter_s": 27.938206911087036, "time_total_s": 10134.612583875656, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10134.612583875656, "timesteps_since_restore": 1971200, "iterations_since_restore": 154, "perf": {"cpu_util_percent": 33.69230769230769, "ram_util_percent": 58.38717948717951}} -{"episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 573.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.615}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.83, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.53, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.7, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.53, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.7, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.53, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.7, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0, 579.0, 587.0, 582.0, 579.0, 579.0, 582.0, 582.0, 510.0, 579.0, 579.0, 587.0, 576.0, 582.0, 579.0, 573.0, 582.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 587.0, 579.0, 576.0, 512.0, 579.0, 582.0, 587.0, 576.0, 573.0, 582.0, 582.0, 582.0, 582.0, 579.0, 570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 288.0, 291.0, 286.0, 301.0, 286.0, 296.0, 304.0, 275.0, 289.0, 290.0, 293.0, 289.0, 285.0, 297.0, 244.0, 266.0, 290.0, 289.0, 282.0, 297.0, 309.0, 278.0, 287.0, 289.0, 293.0, 289.0, 273.0, 306.0, 294.0, 279.0, 289.0, 293.0, 280.0, 293.0, 279.0, 303.0, 281.0, 298.0, 291.0, 285.0, 262.0, 260.0, 286.0, 296.0, 293.0, 294.0, 288.0, 291.0, 283.0, 293.0, 262.0, 250.0, 285.0, 294.0, 280.0, 302.0, 296.0, 291.0, 285.0, 291.0, 290.0, 283.0, 288.0, 294.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4213938109933664, "mean_processing_ms": 0.35206327984916896, "mean_inference_ms": 1.974322466189253}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3720000, "num_steps_sampled": 1984000, "sample_time_ms": 20399.728, "load_time_ms": 37.504, "grad_time_ms": 9171.651, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0077364686876535416, "policy_loss": -0.000512867234647274, "vf_loss": 88.10808563232422, "vf_explained_var": 0.7624195218086243, "kl": 0.0021189304534345865, "entropy": 1.1229437589645386, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1984000, "episodes_total": 4960, "training_iteration": 155, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-02-25", "timestamp": 1660251745, "time_this_iter_s": 27.89369297027588, "time_total_s": 10162.506276845932, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10162.506276845932, "timesteps_since_restore": 1984000, "iterations_since_restore": 155, "perf": {"cpu_util_percent": 34.404999999999994, "ram_util_percent": 58.395}} -{"episode_reward_max": 627.0, "episode_reward_min": 462.0, "episode_reward_mean": 570.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.3}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.4, "shaped_reward_min": 142, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.04, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.95, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0, 582.0, 582.0, 582.0, 579.0, 570.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 582.0, 579.0, 522.0, 576.0, 581.0, 510.0, 527.0, 584.0, 576.0, 573.0, 582.0, 576.0, 522.0, 576.0, 561.0, 579.0, 579.0, 579.0, 579.0, 581.0, 579.0, 573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 291.0, 291.0, 285.0, 297.0, 288.0, 294.0, 298.0, 281.0, 292.0, 278.0, 273.0, 309.0, 290.0, 286.0, 298.0, 278.0, 282.0, 297.0, 285.0, 297.0, 293.0, 286.0, 283.0, 290.0, 294.0, 282.0, 298.0, 284.0, 294.0, 285.0, 284.0, 298.0, 285.0, 294.0, 280.0, 242.0, 292.0, 284.0, 303.0, 278.0, 262.0, 248.0, 281.0, 246.0, 290.0, 294.0, 299.0, 277.0, 282.0, 291.0, 290.0, 292.0, 282.0, 294.0, 270.0, 252.0, 277.0, 299.0, 283.0, 278.0, 280.0, 299.0, 279.0, 300.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4150911429857618, "mean_processing_ms": 0.3508138897349896, "mean_inference_ms": 1.9677135371948458}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3744000, "num_steps_sampled": 1996800, "sample_time_ms": 20238.789, "load_time_ms": 37.088, "grad_time_ms": 8942.988, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004439468961209059, "policy_loss": -0.0041490718722343445, "vf_loss": 91.5320053100586, "vf_explained_var": 0.7567749619483948, "kl": 0.001588103943504393, "entropy": 1.1293169260025024, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 1996800, "episodes_total": 4992, "training_iteration": 156, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-02-54", "timestamp": 1660251774, "time_this_iter_s": 29.386072158813477, "time_total_s": 10191.892349004745, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10191.892349004745, "timesteps_since_restore": 1996800, "iterations_since_restore": 156, "perf": {"cpu_util_percent": 34.03658536585366, "ram_util_percent": 58.353658536585364}} -{"episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 569.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.82}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.84, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.8, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.79, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.2, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.86, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.98, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.2, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.86, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.2, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.86, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0, 579.0, 579.0, 581.0, 579.0, 573.0, 576.0, 582.0, 582.0, 579.0, 576.0, 579.0, 567.0, 576.0, 579.0, 579.0, 579.0, 579.0, 579.0, 570.0, 579.0, 576.0, 627.0, 587.0, 582.0, 512.0, 564.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 525.0, 582.0, 582.0, 587.0, 587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 282.0, 297.0, 278.0, 301.0, 297.0, 284.0, 291.0, 288.0, 296.0, 277.0, 298.0, 278.0, 287.0, 295.0, 287.0, 295.0, 295.0, 284.0, 290.0, 286.0, 285.0, 294.0, 286.0, 281.0, 293.0, 283.0, 298.0, 281.0, 288.0, 291.0, 296.0, 283.0, 293.0, 286.0, 288.0, 291.0, 287.0, 283.0, 281.0, 298.0, 294.0, 282.0, 306.0, 321.0, 301.0, 286.0, 288.0, 294.0, 260.0, 252.0, 285.0, 279.0, 288.0, 294.0, 284.0, 295.0, 281.0, 301.0, 256.0, 271.0, 290.0, 280.0, 293.0, 286.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4088928730851003, "mean_processing_ms": 0.34958857715576797, "mean_inference_ms": 1.9613751884714845}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3768000, "num_steps_sampled": 2009600, "sample_time_ms": 20457.246, "load_time_ms": 36.907, "grad_time_ms": 8867.46, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003602199489250779, "policy_loss": -0.004857169929891825, "vf_loss": 90.2380599975586, "vf_explained_var": 0.7651500105857849, "kl": 0.0019707006867974997, "entropy": 1.1288973093032837, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2009600, "episodes_total": 5024, "training_iteration": 157, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-03-24", "timestamp": 1660251804, "time_this_iter_s": 29.596789121627808, "time_total_s": 10221.489138126373, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10221.489138126373, "timesteps_since_restore": 2009600, "iterations_since_restore": 157, "perf": {"cpu_util_percent": 35.76428571428571, "ram_util_percent": 58.59285714285714}} -{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 563.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.585}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.77, "shaped_reward_min": 65, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.97, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.65, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.65, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.65, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0, 525.0, 582.0, 582.0, 587.0, 587.0, 462.0, 578.0, 579.0, 519.0, 579.0, 582.0, 567.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 579.0, 564.0, 519.0, 567.0, 579.0, 570.0, 582.0, 573.0, 579.0, 527.0, 582.0, 576.0, 573.0, 570.0, 515.0, 576.0, 582.0, 579.0, 582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 260.0, 265.0, 283.0, 299.0, 289.0, 293.0, 294.0, 293.0, 293.0, 294.0, 236.0, 226.0, 295.0, 283.0, 287.0, 292.0, 268.0, 251.0, 295.0, 284.0, 285.0, 297.0, 281.0, 286.0, 293.0, 283.0, 289.0, 284.0, 285.0, 288.0, 290.0, 289.0, 280.0, 299.0, 286.0, 290.0, 295.0, 284.0, 285.0, 279.0, 264.0, 255.0, 279.0, 288.0, 295.0, 284.0, 273.0, 297.0, 286.0, 296.0, 290.0, 283.0, 284.0, 295.0, 249.0, 278.0, 288.0, 294.0, 277.0, 299.0, 282.0, 291.0, 290.0, 280.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.4027851249897816, "mean_processing_ms": 0.3483803806446401, "mean_inference_ms": 1.9551724322503146}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3792000, "num_steps_sampled": 2022400, "sample_time_ms": 20413.446, "load_time_ms": 37.245, "grad_time_ms": 8954.782, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005850760731846094, "policy_loss": -0.002336603356525302, "vf_loss": 87.48675537109375, "vf_explained_var": 0.7656591534614563, "kl": 0.0021419422701001167, "entropy": 1.1226191520690918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2022400, "episodes_total": 5056, "training_iteration": 158, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-03-54", "timestamp": 1660251834, "time_this_iter_s": 30.44686508178711, "time_total_s": 10251.93600320816, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10251.93600320816, "timesteps_since_restore": 2022400, "iterations_since_restore": 158, "perf": {"cpu_util_percent": 37.25348837209302, "ram_util_percent": 58.44883720930233}} -{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 564.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 282.115}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.03, "shaped_reward_min": 65, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.6, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0, 515.0, 576.0, 582.0, 579.0, 582.0, 533.0, 564.0, 630.0, 579.0, 587.0, 579.0, 593.0, 582.0, 582.0, 584.0, 582.0, 573.0, 513.0, 570.0, 587.0, 507.0, 579.0, 564.0, 516.0, 579.0, 579.0, 576.0, 579.0, 579.0, 576.0, 579.0, 530.0, 573.0, 549.0, 501.0, 582.0, 576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 260.0, 255.0, 278.0, 298.0, 280.0, 302.0, 294.0, 285.0, 299.0, 283.0, 271.0, 262.0, 284.0, 280.0, 309.0, 321.0, 294.0, 285.0, 291.0, 296.0, 283.0, 296.0, 289.0, 304.0, 294.0, 288.0, 286.0, 296.0, 295.0, 289.0, 283.0, 299.0, 280.0, 293.0, 263.0, 250.0, 274.0, 296.0, 293.0, 294.0, 256.0, 251.0, 295.0, 284.0, 285.0, 279.0, 245.0, 271.0, 288.0, 291.0, 284.0, 295.0, 284.0, 292.0, 289.0, 290.0, 293.0, 286.0, 276.0, 300.0, 299.0, 280.0, 253.0, 277.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3967794054069242, "mean_processing_ms": 0.3471925853842631, "mean_inference_ms": 1.9496805791927851}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3816000, "num_steps_sampled": 2035200, "sample_time_ms": 20765.512, "load_time_ms": 37.179, "grad_time_ms": 8845.352, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005116061773151159, "policy_loss": -0.0030946088954806328, "vf_loss": 87.75751495361328, "vf_explained_var": 0.7570715546607971, "kl": 0.0022622861433774233, "entropy": 1.1301772594451904, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2035200, "episodes_total": 5088, "training_iteration": 159, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-04-27", "timestamp": 1660251867, "time_this_iter_s": 32.650943994522095, "time_total_s": 10284.586947202682, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10284.586947202682, "timesteps_since_restore": 2035200, "iterations_since_restore": 159, "perf": {"cpu_util_percent": 33.56739130434783, "ram_util_percent": 58.49565217391306}} -{"episode_reward_max": 630.0, "episode_reward_min": 185.0, "episode_reward_mean": 562.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 281.46}, "custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.92, "shaped_reward_min": 65, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.07, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.29, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.27, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.07, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.07, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0, 573.0, 549.0, 501.0, 582.0, 576.0, 573.0, 495.0, 579.0, 579.0, 582.0, 570.0, 576.0, 567.0, 579.0, 582.0, 582.0, 579.0, 530.0, 573.0, 536.0, 533.0, 579.0, 185.0, 576.0, 582.0, 582.0, 544.0, 579.0, 576.0, 552.0, 570.0, 573.0, 630.0, 579.0, 495.0, 579.0, 587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 295.0, 270.0, 279.0, 264.0, 237.0, 286.0, 296.0, 276.0, 300.0, 280.0, 293.0, 248.0, 247.0, 295.0, 284.0, 282.0, 297.0, 278.0, 304.0, 276.0, 294.0, 288.0, 288.0, 299.0, 268.0, 299.0, 280.0, 285.0, 297.0, 294.0, 288.0, 291.0, 288.0, 277.0, 253.0, 297.0, 276.0, 273.0, 263.0, 275.0, 258.0, 288.0, 291.0, 89.0, 96.0, 301.0, 275.0, 290.0, 292.0, 288.0, 294.0, 276.0, 268.0, 282.0, 297.0, 290.0, 286.0, 281.0, 271.0, 293.0, 277.0, 275.0, 298.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3908344800853478, "mean_processing_ms": 0.3460119024024818, "mean_inference_ms": 1.9441766821864475}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3840000, "num_steps_sampled": 2048000, "sample_time_ms": 20695.053, "load_time_ms": 36.846, "grad_time_ms": 8667.722, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033892595674842596, "policy_loss": -0.004953155294060707, "vf_loss": 89.03093719482422, "vf_explained_var": 0.7680574059486389, "kl": 0.0018749010050669312, "entropy": 1.1213653087615967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2048000, "episodes_total": 5120, "training_iteration": 160, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-04-56", "timestamp": 1660251896, "time_this_iter_s": 28.625488996505737, "time_total_s": 10313.212436199188, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10313.212436199188, "timesteps_since_restore": 2048000, "iterations_since_restore": 160, "perf": {"cpu_util_percent": 34.958536585365856, "ram_util_percent": 58.548780487804876}} -{"episode_reward_max": 630.0, "episode_reward_min": 237.0, "episode_reward_mean": 567.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 283.645}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.49, "shaped_reward_min": 77, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.94, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.55, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0, 630.0, 579.0, 495.0, 579.0, 587.0, 582.0, 573.0, 561.0, 584.0, 489.0, 573.0, 512.0, 582.0, 582.0, 576.0, 539.0, 570.0, 519.0, 627.0, 579.0, 518.0, 516.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 582.0, 573.0, 579.0, 579.0, 582.0, 570.0, 630.0, 237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 316.0, 314.0, 286.0, 293.0, 256.0, 239.0, 287.0, 292.0, 309.0, 278.0, 294.0, 288.0, 287.0, 286.0, 297.0, 264.0, 291.0, 293.0, 240.0, 249.0, 292.0, 281.0, 262.0, 250.0, 297.0, 285.0, 285.0, 297.0, 279.0, 297.0, 262.0, 277.0, 287.0, 283.0, 266.0, 253.0, 321.0, 306.0, 286.0, 293.0, 272.0, 246.0, 274.0, 242.0, 282.0, 294.0, 284.0, 298.0, 288.0, 294.0, 285.0, 302.0, 295.0, 287.0, 288.0, 294.0, 279.0, 297.0, 289.0, 293.0, 277.0, 296.0, 286.0, 293.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3849427098912996, "mean_processing_ms": 0.3448405344667729, "mean_inference_ms": 1.9385368397952782}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3864000, "num_steps_sampled": 2060800, "sample_time_ms": 20359.83, "load_time_ms": 36.714, "grad_time_ms": 8574.991, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003171335905790329, "policy_loss": -0.005784957204014063, "vf_loss": 95.20501708984375, "vf_explained_var": 0.7632928490638733, "kl": 0.001863123499788344, "entropy": 1.1284128427505493, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2060800, "episodes_total": 5152, "training_iteration": 161, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-05-24", "timestamp": 1660251924, "time_this_iter_s": 28.188406705856323, "time_total_s": 10341.400842905045, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10341.400842905045, "timesteps_since_restore": 2060800, "iterations_since_restore": 161, "perf": {"cpu_util_percent": 36.3875, "ram_util_percent": 58.585}} -{"episode_reward_max": 630.0, "episode_reward_min": 237.0, "episode_reward_mean": 568.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 284.18}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.16, "shaped_reward_min": 77, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0, 579.0, 582.0, 570.0, 630.0, 237.0, 573.0, 590.0, 573.0, 582.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 579.0, 576.0, 579.0, 521.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 579.0, 513.0, 582.0, 582.0, 582.0, 576.0, 582.0, 567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 304.0, 275.0, 290.0, 292.0, 294.0, 276.0, 326.0, 304.0, 122.0, 115.0, 280.0, 293.0, 291.0, 299.0, 293.0, 280.0, 291.0, 291.0, 291.0, 285.0, 292.0, 287.0, 287.0, 289.0, 285.0, 302.0, 299.0, 283.0, 285.0, 297.0, 283.0, 296.0, 289.0, 287.0, 291.0, 288.0, 269.0, 252.0, 287.0, 295.0, 269.0, 301.0, 282.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 281.0, 298.0, 275.0, 307.0, 294.0, 285.0, 256.0, 257.0, 295.0, 287.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3791010078296764, "mean_processing_ms": 0.34367607505559905, "mean_inference_ms": 1.932410583140313}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3888000, "num_steps_sampled": 2073600, "sample_time_ms": 20389.495, "load_time_ms": 36.868, "grad_time_ms": 8516.869, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007552561815828085, "policy_loss": -0.0015357719967141747, "vf_loss": 96.43359375, "vf_explained_var": 0.7504541277885437, "kl": 0.0026693844702094793, "entropy": 1.110058307647705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2073600, "episodes_total": 5184, "training_iteration": 162, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-05-53", "timestamp": 1660251953, "time_this_iter_s": 29.546289205551147, "time_total_s": 10370.947132110596, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10370.947132110596, "timesteps_since_restore": 2073600, "iterations_since_restore": 162, "perf": {"cpu_util_percent": 34.892682926829266, "ram_util_percent": 58.55365853658536}} -{"episode_reward_max": 630.0, "episode_reward_min": 345.0, "episode_reward_mean": 568.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 284.395}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.79, "shaped_reward_min": 105, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.01, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.01, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.01, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 582.0, 576.0, 582.0, 567.0, 567.0, 582.0, 582.0, 579.0, 576.0, 587.0, 504.0, 579.0, 522.0, 516.0, 579.0, 579.0, 584.0, 587.0, 525.0, 630.0, 582.0, 408.0, 582.0, 530.0, 630.0, 570.0, 579.0, 582.0, 576.0, 582.0, 573.0, 561.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 280.0, 302.0, 285.0, 297.0, 291.0, 285.0, 302.0, 280.0, 278.0, 289.0, 277.0, 290.0, 291.0, 291.0, 279.0, 303.0, 295.0, 284.0, 294.0, 282.0, 295.0, 292.0, 257.0, 247.0, 289.0, 290.0, 250.0, 272.0, 249.0, 267.0, 290.0, 289.0, 302.0, 277.0, 299.0, 285.0, 304.0, 283.0, 267.0, 258.0, 299.0, 331.0, 285.0, 297.0, 205.0, 203.0, 291.0, 291.0, 269.0, 261.0, 313.0, 317.0, 288.0, 282.0, 289.0, 290.0, 288.0, 294.0, 293.0, 283.0, 298.0, 284.0, 288.0, 285.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3733429258954066, "mean_processing_ms": 0.3425317863430243, "mean_inference_ms": 1.9266299653449164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3912000, "num_steps_sampled": 2086400, "sample_time_ms": 20824.632, "load_time_ms": 36.596, "grad_time_ms": 8758.37, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004362883511930704, "policy_loss": -0.003907353617250919, "vf_loss": 88.24420166015625, "vf_explained_var": 0.7741295695304871, "kl": 0.002105970401316881, "entropy": 1.1083542108535767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2086400, "episodes_total": 5216, "training_iteration": 163, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-06-26", "timestamp": 1660251986, "time_this_iter_s": 32.18382000923157, "time_total_s": 10403.130952119827, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10403.130952119827, "timesteps_since_restore": 2086400, "iterations_since_restore": 163, "perf": {"cpu_util_percent": 33.87608695652174, "ram_util_percent": 58.582608695652176}} -{"episode_reward_max": 630.0, "episode_reward_min": 336.0, "episode_reward_mean": 565.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 282.99}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 173.98, "shaped_reward_min": 96, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.0, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.0, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.0, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0, 561.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 587.0, 582.0, 573.0, 576.0, 579.0, 573.0, 627.0, 558.0, 570.0, 579.0, 579.0, 573.0, 581.0, 582.0, 539.0, 582.0, 579.0, 582.0, 579.0, 570.0, 567.0, 522.0, 587.0, 564.0, 584.0, 507.0, 493.0, 570.0, 579.0, 582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 284.0, 277.0, 296.0, 286.0, 283.0, 293.0, 291.0, 282.0, 291.0, 285.0, 291.0, 288.0, 296.0, 286.0, 293.0, 294.0, 291.0, 291.0, 287.0, 286.0, 282.0, 294.0, 290.0, 289.0, 283.0, 290.0, 313.0, 314.0, 289.0, 269.0, 284.0, 286.0, 295.0, 284.0, 283.0, 296.0, 284.0, 289.0, 284.0, 297.0, 283.0, 299.0, 268.0, 271.0, 302.0, 280.0, 294.0, 285.0, 297.0, 285.0, 293.0, 286.0, 282.0, 288.0, 302.0, 265.0, 252.0, 270.0, 291.0, 296.0, 284.0, 280.0, 285.0, 299.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3676747484724832, "mean_processing_ms": 0.341408599904806, "mean_inference_ms": 1.9212242933819834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3936000, "num_steps_sampled": 2099200, "sample_time_ms": 20928.906, "load_time_ms": 37.45, "grad_time_ms": 9167.09, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035528007429093122, "policy_loss": -0.005154869984835386, "vf_loss": 92.63870239257812, "vf_explained_var": 0.7672746181488037, "kl": 0.0020837958436459303, "entropy": 1.1124038696289062, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2099200, "episodes_total": 5248, "training_iteration": 164, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-06-59", "timestamp": 1660252019, "time_this_iter_s": 33.07875204086304, "time_total_s": 10436.20970416069, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10436.20970416069, "timesteps_since_restore": 2099200, "iterations_since_restore": 164, "perf": {"cpu_util_percent": 35.41276595744681, "ram_util_percent": 58.536170212765946}} -{"episode_reward_max": 633.0, "episode_reward_min": 336.0, "episode_reward_mean": 565.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 282.81}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.02, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.36, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0, 507.0, 493.0, 570.0, 579.0, 582.0, 579.0, 582.0, 470.0, 564.0, 576.0, 576.0, 576.0, 582.0, 587.0, 579.0, 582.0, 579.0, 345.0, 570.0, 576.0, 582.0, 576.0, 576.0, 579.0, 576.0, 567.0, 567.0, 582.0, 579.0, 564.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 249.0, 258.0, 250.0, 243.0, 279.0, 291.0, 280.0, 299.0, 281.0, 301.0, 285.0, 294.0, 279.0, 303.0, 227.0, 243.0, 263.0, 301.0, 280.0, 296.0, 276.0, 300.0, 283.0, 293.0, 286.0, 296.0, 301.0, 286.0, 288.0, 291.0, 288.0, 294.0, 302.0, 277.0, 173.0, 172.0, 289.0, 281.0, 285.0, 291.0, 306.0, 276.0, 280.0, 296.0, 290.0, 286.0, 293.0, 286.0, 283.0, 293.0, 280.0, 287.0, 289.0, 278.0, 298.0, 284.0, 285.0, 294.0, 280.0, 284.0, 282.0, 288.0, 281.0, 298.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3620936512767547, "mean_processing_ms": 0.34030575499793914, "mean_inference_ms": 1.9161448666876886}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3960000, "num_steps_sampled": 2112000, "sample_time_ms": 21120.168, "load_time_ms": 37.608, "grad_time_ms": 9339.122, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019619378726929426, "policy_loss": -0.006335819140076637, "vf_loss": 88.54428100585938, "vf_explained_var": 0.7676218152046204, "kl": 0.0017338074976578355, "entropy": 1.1133431196212769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2112000, "episodes_total": 5280, "training_iteration": 165, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-07-30", "timestamp": 1660252050, "time_this_iter_s": 31.535957098007202, "time_total_s": 10467.745661258698, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10467.745661258698, "timesteps_since_restore": 2112000, "iterations_since_restore": 165, "perf": {"cpu_util_percent": 34.425000000000004, "ram_util_percent": 58.65909090909092}} -{"episode_reward_max": 633.0, "episode_reward_min": 336.0, "episode_reward_mean": 569.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 162.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.81}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.22, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.34, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0, 582.0, 579.0, 576.0, 627.0, 579.0, 587.0, 501.0, 579.0, 573.0, 558.0, 582.0, 525.0, 579.0, 430.0, 576.0, 530.0, 519.0, 579.0, 587.0, 582.0, 336.0, 570.0, 465.0, 582.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 582.0, 627.0, 587.0, 630.0, 579.0, 579.0, 576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 295.0, 287.0, 282.0, 297.0, 285.0, 291.0, 306.0, 321.0, 287.0, 292.0, 295.0, 292.0, 255.0, 246.0, 279.0, 300.0, 283.0, 290.0, 285.0, 273.0, 301.0, 281.0, 264.0, 261.0, 285.0, 294.0, 218.0, 212.0, 286.0, 290.0, 263.0, 267.0, 272.0, 247.0, 300.0, 279.0, 304.0, 283.0, 301.0, 281.0, 162.0, 174.0, 279.0, 291.0, 231.0, 234.0, 276.0, 306.0, 286.0, 296.0, 281.0, 301.0, 288.0, 285.0, 290.0, 292.0, 291.0, 285.0, 298.0, 284.0, 296.0, 286.0, 311.0, 316.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3565665278792982, "mean_processing_ms": 0.33921520895760066, "mean_inference_ms": 1.9109392629722073}, "off_policy_estimator": {}, "info": {"num_steps_trained": 3984000, "num_steps_sampled": 2124800, "sample_time_ms": 21230.051, "load_time_ms": 38.109, "grad_time_ms": 9623.189, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006461843382567167, "policy_loss": -0.0018003573641180992, "vf_loss": 88.1545181274414, "vf_explained_var": 0.7546200752258301, "kl": 0.00197615590877831, "entropy": 1.106500267982483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2124800, "episodes_total": 5312, "training_iteration": 166, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-08-04", "timestamp": 1660252084, "time_this_iter_s": 33.33124303817749, "time_total_s": 10501.076904296875, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10501.076904296875, "timesteps_since_restore": 2124800, "iterations_since_restore": 166, "perf": {"cpu_util_percent": 33.48510638297872, "ram_util_percent": 58.49574468085109}} -{"episode_reward_max": 633.0, "episode_reward_min": 416.0, "episode_reward_mean": 574.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.395}, "custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.19, "shaped_reward_min": 136, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.25, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0, 587.0, 630.0, 579.0, 579.0, 576.0, 579.0, 576.0, 573.0, 587.0, 573.0, 633.0, 630.0, 630.0, 573.0, 582.0, 582.0, 582.0, 516.0, 579.0, 582.0, 627.0, 576.0, 416.0, 582.0, 582.0, 579.0, 558.0, 582.0, 576.0, 576.0, 525.0, 579.0, 579.0, 513.0, 582.0, 582.0, 573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 290.0, 297.0, 319.0, 311.0, 289.0, 290.0, 293.0, 286.0, 286.0, 290.0, 283.0, 296.0, 295.0, 281.0, 305.0, 268.0, 297.0, 290.0, 296.0, 277.0, 314.0, 319.0, 319.0, 311.0, 308.0, 322.0, 291.0, 282.0, 296.0, 286.0, 297.0, 285.0, 285.0, 297.0, 266.0, 250.0, 295.0, 284.0, 291.0, 291.0, 324.0, 303.0, 298.0, 278.0, 213.0, 203.0, 287.0, 295.0, 296.0, 286.0, 283.0, 296.0, 279.0, 279.0, 285.0, 297.0, 282.0, 294.0, 285.0, 291.0, 274.0, 251.0, 293.0, 286.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.351083451607247, "mean_processing_ms": 0.3381304952990823, "mean_inference_ms": 1.9054854328203157}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4008000, "num_steps_sampled": 2137600, "sample_time_ms": 21012.721, "load_time_ms": 38.367, "grad_time_ms": 10014.737, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006168690975755453, "policy_loss": -0.002181840827688575, "vf_loss": 88.96065521240234, "vf_explained_var": 0.762434184551239, "kl": 0.0017693521222099662, "entropy": 1.0910512208938599, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2137600, "episodes_total": 5344, "training_iteration": 167, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-08-35", "timestamp": 1660252115, "time_this_iter_s": 31.34629511833191, "time_total_s": 10532.423199415207, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10532.423199415207, "timesteps_since_restore": 2137600, "iterations_since_restore": 167, "perf": {"cpu_util_percent": 33.334090909090904, "ram_util_percent": 58.479545454545466}} -{"episode_reward_max": 633.0, "episode_reward_min": 452.0, "episode_reward_mean": 571.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.905}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.21, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0, 579.0, 513.0, 582.0, 582.0, 573.0, 567.0, 536.0, 579.0, 582.0, 582.0, 567.0, 587.0, 522.0, 582.0, 579.0, 578.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 570.0, 573.0, 573.0, 564.0, 576.0, 576.0, 573.0, 570.0, 579.0, 573.0, 579.0, 570.0, 579.0, 579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 289.0, 290.0, 267.0, 246.0, 283.0, 299.0, 299.0, 283.0, 284.0, 289.0, 284.0, 283.0, 261.0, 275.0, 289.0, 290.0, 283.0, 299.0, 299.0, 283.0, 275.0, 292.0, 298.0, 289.0, 252.0, 270.0, 293.0, 289.0, 303.0, 276.0, 290.0, 288.0, 293.0, 277.0, 278.0, 304.0, 285.0, 297.0, 301.0, 281.0, 297.0, 285.0, 281.0, 301.0, 283.0, 299.0, 283.0, 287.0, 292.0, 281.0, 287.0, 286.0, 273.0, 291.0, 296.0, 280.0, 281.0, 295.0, 290.0, 283.0, 296.0, 274.0, 297.0, 282.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3456449831196562, "mean_processing_ms": 0.3370529700799381, "mean_inference_ms": 1.8997987412977424}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4032000, "num_steps_sampled": 2150400, "sample_time_ms": 20935.463, "load_time_ms": 38.009, "grad_time_ms": 9985.412, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029837340116500854, "policy_loss": -0.005580740049481392, "vf_loss": 91.1910629272461, "vf_explained_var": 0.7490768432617188, "kl": 0.0017398769268766046, "entropy": 1.1092572212219238, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2150400, "episodes_total": 5376, "training_iteration": 168, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-09-04", "timestamp": 1660252144, "time_this_iter_s": 29.37734818458557, "time_total_s": 10561.800547599792, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10561.800547599792, "timesteps_since_restore": 2150400, "iterations_since_restore": 168, "perf": {"cpu_util_percent": 32.31428571428572, "ram_util_percent": 58.38571428571428}} -{"episode_reward_max": 633.0, "episode_reward_min": 452.0, "episode_reward_mean": 570.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.355}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.91, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.74, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.94, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.94, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.94, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 573.0, 579.0, 570.0, 579.0, 579.0, 582.0, 576.0, 582.0, 573.0, 576.0, 576.0, 576.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 525.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 582.0, 522.0, 579.0, 582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 297.0, 276.0, 303.0, 276.0, 292.0, 278.0, 284.0, 295.0, 283.0, 296.0, 301.0, 281.0, 293.0, 283.0, 294.0, 288.0, 288.0, 285.0, 292.0, 284.0, 282.0, 294.0, 291.0, 285.0, 293.0, 289.0, 292.0, 287.0, 288.0, 291.0, 301.0, 281.0, 295.0, 287.0, 291.0, 291.0, 297.0, 282.0, 294.0, 285.0, 293.0, 289.0, 269.0, 261.0, 279.0, 300.0, 298.0, 284.0, 270.0, 309.0, 288.0, 291.0, 263.0, 262.0, 293.0, 289.0, 303.0, 276.0, 286.0, 296.0, 301.0, 281.0, 286.0, 290.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3402643108658208, "mean_processing_ms": 0.3359831351985412, "mean_inference_ms": 1.8939334033513233}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4056000, "num_steps_sampled": 2163200, "sample_time_ms": 20463.428, "load_time_ms": 38.087, "grad_time_ms": 10025.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005885738879442215, "policy_loss": -0.001977432519197464, "vf_loss": 84.17040252685547, "vf_explained_var": 0.7570996880531311, "kl": 0.0022582625970244408, "entropy": 1.1077399253845215, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2163200, "episodes_total": 5408, "training_iteration": 169, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-09-33", "timestamp": 1660252173, "time_this_iter_s": 28.335352182388306, "time_total_s": 10590.13589978218, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10590.13589978218, "timesteps_since_restore": 2163200, "iterations_since_restore": 169, "perf": {"cpu_util_percent": 29.0625, "ram_util_percent": 58.379999999999995}} -{"episode_reward_max": 633.0, "episode_reward_min": 422.0, "episode_reward_mean": 569.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 284.605}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.61, "shaped_reward_min": 132, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.33, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.4, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.63, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.4, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.63, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.4, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.63, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0, 582.0, 582.0, 522.0, 579.0, 582.0, 570.0, 525.0, 582.0, 579.0, 452.0, 579.0, 567.0, 530.0, 576.0, 582.0, 582.0, 525.0, 576.0, 627.0, 573.0, 573.0, 527.0, 576.0, 579.0, 564.0, 633.0, 582.0, 567.0, 536.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 576.0, 579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 296.0, 286.0, 300.0, 282.0, 272.0, 250.0, 303.0, 276.0, 283.0, 299.0, 292.0, 278.0, 255.0, 270.0, 287.0, 295.0, 292.0, 287.0, 223.0, 229.0, 274.0, 305.0, 272.0, 295.0, 272.0, 258.0, 280.0, 296.0, 291.0, 291.0, 288.0, 294.0, 269.0, 256.0, 294.0, 282.0, 312.0, 315.0, 291.0, 282.0, 302.0, 271.0, 261.0, 266.0, 281.0, 295.0, 298.0, 281.0, 280.0, 284.0, 326.0, 307.0, 296.0, 286.0, 288.0, 279.0, 267.0, 269.0, 298.0, 281.0, 288.0, 291.0, 291.0, 288.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.334950887862658, "mean_processing_ms": 0.3349244818889894, "mean_inference_ms": 1.8881606558059565}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4080000, "num_steps_sampled": 2176000, "sample_time_ms": 20370.044, "load_time_ms": 38.206, "grad_time_ms": 10005.991, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00501619465649128, "policy_loss": -0.0036706894170492887, "vf_loss": 92.4554214477539, "vf_explained_var": 0.7515974044799805, "kl": 0.0018303836695849895, "entropy": 1.1173133850097656, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2176000, "episodes_total": 5440, "training_iteration": 170, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-00", "timestamp": 1660252200, "time_this_iter_s": 27.505138874053955, "time_total_s": 10617.641038656235, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10617.641038656235, "timesteps_since_restore": 2176000, "iterations_since_restore": 170, "perf": {"cpu_util_percent": 30.13076923076923, "ram_util_percent": 58.446153846153834}} -{"episode_reward_max": 630.0, "episode_reward_min": 422.0, "episode_reward_mean": 571.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 285.645}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.89, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.26, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.6, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.52, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.52, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.52, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 579.0, 522.0, 576.0, 576.0, 582.0, 533.0, 536.0, 573.0, 570.0, 576.0, 579.0, 582.0, 576.0, 516.0, 579.0, 579.0, 579.0, 504.0, 582.0, 558.0, 579.0, 579.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 294.0, 285.0, 275.0, 301.0, 294.0, 282.0, 303.0, 273.0, 291.0, 288.0, 280.0, 299.0, 262.0, 260.0, 283.0, 293.0, 284.0, 292.0, 293.0, 289.0, 249.0, 284.0, 273.0, 263.0, 274.0, 299.0, 288.0, 282.0, 297.0, 279.0, 296.0, 283.0, 273.0, 309.0, 285.0, 291.0, 251.0, 265.0, 285.0, 294.0, 283.0, 296.0, 280.0, 299.0, 249.0, 255.0, 296.0, 286.0, 284.0, 274.0, 293.0, 286.0, 287.0, 292.0, 292.0, 284.0, 283.0, 299.0, 289.0, 293.0, 275.0, 298.0, 290.0, 283.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3296971139486637, "mean_processing_ms": 0.333876638718542, "mean_inference_ms": 1.8823863210387035}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4104000, "num_steps_sampled": 2188800, "sample_time_ms": 20376.689, "load_time_ms": 38.395, "grad_time_ms": 9898.441, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001497833989560604, "policy_loss": -0.006948364432901144, "vf_loss": 90.00249481201172, "vf_explained_var": 0.7635095119476318, "kl": 0.0017910072347149253, "entropy": 1.1081151962280273, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2188800, "episodes_total": 5472, "training_iteration": 171, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-27", "timestamp": 1660252227, "time_this_iter_s": 27.183032989501953, "time_total_s": 10644.824071645737, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10644.824071645737, "timesteps_since_restore": 2188800, "iterations_since_restore": 171, "perf": {"cpu_util_percent": 32.57105263157895, "ram_util_percent": 58.3842105263158}} -{"episode_reward_max": 630.0, "episode_reward_min": 422.0, "episode_reward_mean": 573.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 209.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.845}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 178.09, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.25, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.51, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.51, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.51, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 576.0, 536.0, 561.0, 576.0, 570.0, 587.0, 582.0, 573.0, 582.0, 576.0, 579.0, 576.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 579.0, 630.0, 536.0, 582.0, 587.0, 579.0, 582.0, 422.0, 587.0, 579.0, 576.0, 536.0, 573.0, 587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 285.0, 297.0, 290.0, 286.0, 289.0, 290.0, 286.0, 293.0, 293.0, 286.0, 293.0, 283.0, 250.0, 286.0, 275.0, 286.0, 282.0, 294.0, 276.0, 294.0, 296.0, 291.0, 283.0, 299.0, 280.0, 293.0, 286.0, 296.0, 295.0, 281.0, 288.0, 291.0, 293.0, 283.0, 292.0, 281.0, 292.0, 290.0, 281.0, 298.0, 299.0, 280.0, 293.0, 289.0, 286.0, 290.0, 293.0, 286.0, 311.0, 319.0, 254.0, 282.0, 279.0, 303.0, 296.0, 291.0, 278.0, 301.0, 294.0, 288.0, 209.0, 213.0, 282.0, 305.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3245227944112767, "mean_processing_ms": 0.33284763340426393, "mean_inference_ms": 1.876875864691374}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4128000, "num_steps_sampled": 2201600, "sample_time_ms": 20481.131, "load_time_ms": 38.136, "grad_time_ms": 9697.559, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0045767915435135365, "policy_loss": -0.0035035184118896723, "vf_loss": 86.42507934570312, "vf_explained_var": 0.7563931345939636, "kl": 0.002320564817637205, "entropy": 1.1244043111801147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2201600, "episodes_total": 5504, "training_iteration": 172, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-10-56", "timestamp": 1660252256, "time_this_iter_s": 28.577091932296753, "time_total_s": 10673.401163578033, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10673.401163578033, "timesteps_since_restore": 2201600, "iterations_since_restore": 172, "perf": {"cpu_util_percent": 35.19024390243903, "ram_util_percent": 58.548780487804876}} -{"episode_reward_max": 630.0, "episode_reward_min": 496.0, "episode_reward_mean": 574.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 287.015}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.63, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.48, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.69, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.69, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.69, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0, 579.0, 576.0, 536.0, 573.0, 587.0, 579.0, 573.0, 587.0, 579.0, 576.0, 582.0, 587.0, 582.0, 582.0, 579.0, 579.0, 536.0, 582.0, 579.0, 576.0, 582.0, 579.0, 582.0, 582.0, 573.0, 501.0, 576.0, 522.0, 582.0, 582.0, 579.0, 576.0, 590.0, 539.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 287.0, 292.0, 291.0, 285.0, 258.0, 278.0, 278.0, 295.0, 300.0, 287.0, 303.0, 276.0, 280.0, 293.0, 294.0, 293.0, 295.0, 284.0, 305.0, 271.0, 293.0, 289.0, 292.0, 295.0, 287.0, 295.0, 283.0, 299.0, 296.0, 283.0, 288.0, 291.0, 271.0, 265.0, 295.0, 287.0, 288.0, 291.0, 278.0, 298.0, 286.0, 296.0, 289.0, 290.0, 287.0, 295.0, 297.0, 285.0, 279.0, 294.0, 269.0, 232.0, 287.0, 289.0, 254.0, 268.0, 291.0, 291.0, 276.0, 306.0, 295.0, 284.0, 278.0, 298.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3194261704658106, "mean_processing_ms": 0.3318357537689677, "mean_inference_ms": 1.8715822466645085}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4152000, "num_steps_sampled": 2214400, "sample_time_ms": 20271.412, "load_time_ms": 38.227, "grad_time_ms": 9546.44, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0062603577971458435, "policy_loss": -0.0018654250307008624, "vf_loss": 86.83306121826172, "vf_explained_var": 0.7576972842216492, "kl": 0.0021647585090249777, "entropy": 1.1150306463241577, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2214400, "episodes_total": 5536, "training_iteration": 173, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-11-25", "timestamp": 1660252285, "time_this_iter_s": 28.57458209991455, "time_total_s": 10701.975745677948, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10701.975745677948, "timesteps_since_restore": 2214400, "iterations_since_restore": 173, "perf": {"cpu_util_percent": 30.26, "ram_util_percent": 58.657500000000006}} -{"episode_reward_max": 633.0, "episode_reward_min": 496.0, "episode_reward_mean": 576.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 288.29}, "custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.78, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.65, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.13, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.01, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.86, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.86, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.86, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0, 590.0, 539.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 582.0, 630.0, 573.0, 579.0, 530.0, 576.0, 627.0, 533.0, 530.0, 579.0, 579.0, 579.0, 525.0, 630.0, 519.0, 530.0, 582.0, 627.0, 576.0, 587.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 567.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 293.0, 297.0, 271.0, 268.0, 299.0, 283.0, 290.0, 292.0, 278.0, 304.0, 286.0, 293.0, 290.0, 286.0, 296.0, 286.0, 289.0, 293.0, 316.0, 314.0, 298.0, 275.0, 286.0, 293.0, 261.0, 269.0, 278.0, 298.0, 308.0, 319.0, 265.0, 268.0, 272.0, 258.0, 283.0, 296.0, 294.0, 285.0, 292.0, 287.0, 263.0, 262.0, 305.0, 325.0, 280.0, 239.0, 272.0, 258.0, 306.0, 276.0, 314.0, 313.0, 288.0, 288.0, 296.0, 291.0, 292.0, 290.0, 289.0, 293.0, 275.0, 304.0, 294.0, 288.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3143927753552085, "mean_processing_ms": 0.33083733112110686, "mean_inference_ms": 1.8663998879774686}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4176000, "num_steps_sampled": 2227200, "sample_time_ms": 20085.918, "load_time_ms": 37.439, "grad_time_ms": 9177.291, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001482805237174034, "policy_loss": -0.00769606651738286, "vf_loss": 81.13143920898438, "vf_explained_var": 0.764965295791626, "kl": 0.0018476974219083786, "entropy": 1.1307072639465332, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2227200, "episodes_total": 5568, "training_iteration": 174, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-11-52", "timestamp": 1660252312, "time_this_iter_s": 27.522704124450684, "time_total_s": 10729.498449802399, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10729.498449802399, "timesteps_since_restore": 2227200, "iterations_since_restore": 174, "perf": {"cpu_util_percent": 34.294871794871796, "ram_util_percent": 58.587179487179476}} -{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 573.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 286.51}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.82, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.37, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.56, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.07, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.37, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 567.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 498.0, 579.0, 579.0, 576.0, 561.0, 587.0, 570.0, 552.0, 587.0, 576.0, 579.0, 582.0, 576.0, 587.0, 576.0, 579.0, 579.0, 579.0, 582.0, 573.0, 570.0, 496.0, 579.0, 630.0, 576.0, 567.0, 582.0, 567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 305.0, 277.0, 297.0, 285.0, 288.0, 279.0, 293.0, 286.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 282.0, 297.0, 291.0, 291.0, 241.0, 257.0, 289.0, 290.0, 290.0, 289.0, 280.0, 296.0, 275.0, 286.0, 301.0, 286.0, 281.0, 289.0, 271.0, 281.0, 302.0, 285.0, 276.0, 300.0, 295.0, 284.0, 292.0, 290.0, 291.0, 285.0, 295.0, 292.0, 299.0, 277.0, 289.0, 290.0, 293.0, 286.0, 284.0, 295.0, 290.0, 292.0, 285.0, 288.0, 291.0, 279.0, 253.0, 243.0, 297.0, 282.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.3094165014431445, "mean_processing_ms": 0.3298474823059415, "mean_inference_ms": 1.8613034748518011}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4200000, "num_steps_sampled": 2240000, "sample_time_ms": 20009.535, "load_time_ms": 37.081, "grad_time_ms": 9049.935, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033403884153813124, "policy_loss": -0.004778089467436075, "vf_loss": 86.8664779663086, "vf_explained_var": 0.7622640132904053, "kl": 0.0018111681565642357, "entropy": 1.1363428831100464, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2240000, "episodes_total": 5600, "training_iteration": 175, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-12-22", "timestamp": 1660252342, "time_this_iter_s": 29.488188982009888, "time_total_s": 10758.986638784409, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10758.986638784409, "timesteps_since_restore": 2240000, "iterations_since_restore": 175, "perf": {"cpu_util_percent": 31.97380952380952, "ram_util_percent": 58.61666666666667}} -{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 573.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 286.835}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.47, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.4, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0, 630.0, 576.0, 567.0, 582.0, 567.0, 579.0, 627.0, 573.0, 633.0, 582.0, 567.0, 530.0, 573.0, 564.0, 582.0, 573.0, 579.0, 582.0, 579.0, 558.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 627.0, 630.0, 582.0, 582.0, 582.0, 573.0, 536.0, 582.0, 579.0, 582.0, 507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 318.0, 312.0, 283.0, 293.0, 291.0, 276.0, 286.0, 296.0, 282.0, 285.0, 290.0, 289.0, 298.0, 329.0, 270.0, 303.0, 308.0, 325.0, 301.0, 281.0, 279.0, 288.0, 268.0, 262.0, 296.0, 277.0, 286.0, 278.0, 289.0, 293.0, 285.0, 288.0, 294.0, 285.0, 301.0, 281.0, 295.0, 284.0, 281.0, 277.0, 291.0, 291.0, 308.0, 274.0, 286.0, 296.0, 300.0, 276.0, 284.0, 289.0, 289.0, 293.0, 316.0, 311.0, 321.0, 309.0, 286.0, 296.0, 297.0, 285.0, 298.0, 284.0, 283.0, 290.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.304511305303639, "mean_processing_ms": 0.3288745301367266, "mean_inference_ms": 1.8566250484766516}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4224000, "num_steps_sampled": 2252800, "sample_time_ms": 20239.261, "load_time_ms": 36.617, "grad_time_ms": 8693.262, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0060581061989068985, "policy_loss": -0.0023995088413357735, "vf_loss": 90.20238494873047, "vf_explained_var": 0.7652048468589783, "kl": 0.0019277030369266868, "entropy": 1.1252202987670898, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2252800, "episodes_total": 5632, "training_iteration": 176, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-12-54", "timestamp": 1660252374, "time_this_iter_s": 32.0580530166626, "time_total_s": 10791.044691801071, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10791.044691801071, "timesteps_since_restore": 2252800, "iterations_since_restore": 176, "perf": {"cpu_util_percent": 31.34666666666667, "ram_util_percent": 58.57333333333334}} -{"episode_reward_max": 633.0, "episode_reward_min": 393.0, "episode_reward_mean": 571.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 196.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.7}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.2, "shaped_reward_min": 113, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.61, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0, 536.0, 582.0, 579.0, 582.0, 507.0, 579.0, 582.0, 576.0, 582.0, 393.0, 465.0, 579.0, 582.0, 570.0, 627.0, 579.0, 573.0, 576.0, 579.0, 504.0, 579.0, 579.0, 576.0, 527.0, 579.0, 519.0, 579.0, 587.0, 576.0, 633.0, 579.0, 576.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 270.0, 266.0, 285.0, 297.0, 292.0, 287.0, 293.0, 289.0, 239.0, 268.0, 286.0, 293.0, 283.0, 299.0, 284.0, 292.0, 297.0, 285.0, 197.0, 196.0, 229.0, 236.0, 294.0, 285.0, 285.0, 297.0, 289.0, 281.0, 317.0, 310.0, 301.0, 278.0, 278.0, 295.0, 282.0, 294.0, 284.0, 295.0, 262.0, 242.0, 297.0, 282.0, 290.0, 289.0, 295.0, 281.0, 272.0, 255.0, 288.0, 291.0, 260.0, 259.0, 287.0, 292.0, 294.0, 293.0, 286.0, 290.0, 309.0, 324.0, 285.0, 294.0, 285.0, 291.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2996756400527443, "mean_processing_ms": 0.3279171800323935, "mean_inference_ms": 1.852052219769451}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4248000, "num_steps_sampled": 2265600, "sample_time_ms": 20324.791, "load_time_ms": 36.647, "grad_time_ms": 8435.096, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008986306493170559, "policy_loss": -0.007334645837545395, "vf_loss": 87.94988250732422, "vf_explained_var": 0.7740858197212219, "kl": 0.001811654889024794, "entropy": 1.123410940170288, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2265600, "episodes_total": 5664, "training_iteration": 177, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-13-23", "timestamp": 1660252403, "time_this_iter_s": 29.61364197731018, "time_total_s": 10820.658333778381, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10820.658333778381, "timesteps_since_restore": 2265600, "iterations_since_restore": 177, "perf": {"cpu_util_percent": 35.069047619047616, "ram_util_percent": 58.67619047619048}} -{"episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 576.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.0}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.0, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.63, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.54, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.54, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.54, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0, 582.0, 579.0, 524.0, 627.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 573.0, 576.0, 579.0, 530.0, 576.0, 579.0, 576.0, 525.0, 576.0, 519.0, 530.0, 576.0, 579.0, 582.0, 630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 297.0, 285.0, 288.0, 291.0, 270.0, 254.0, 305.0, 322.0, 280.0, 302.0, 293.0, 289.0, 294.0, 288.0, 310.0, 317.0, 294.0, 288.0, 294.0, 288.0, 276.0, 311.0, 282.0, 297.0, 291.0, 288.0, 296.0, 286.0, 280.0, 299.0, 298.0, 281.0, 296.0, 286.0, 277.0, 299.0, 303.0, 279.0, 291.0, 288.0, 283.0, 299.0, 280.0, 299.0, 295.0, 278.0, 293.0, 283.0, 296.0, 283.0, 266.0, 264.0, 277.0, 299.0, 283.0, 296.0, 291.0, 285.0, 264.0, 261.0, 286.0, 290.0, 267.0, 252.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2948917928576587, "mean_processing_ms": 0.3269695972321587, "mean_inference_ms": 1.8475779345693215}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4272000, "num_steps_sampled": 2278400, "sample_time_ms": 20480.726, "load_time_ms": 37.228, "grad_time_ms": 8437.297, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004881067667156458, "policy_loss": -0.003187847323715687, "vf_loss": 86.31526947021484, "vf_explained_var": 0.7646486163139343, "kl": 0.0018008003244176507, "entropy": 1.125217080116272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2278400, "episodes_total": 5696, "training_iteration": 178, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-13-54", "timestamp": 1660252434, "time_this_iter_s": 30.965723037719727, "time_total_s": 10851.624056816101, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10851.624056816101, "timesteps_since_restore": 2278400, "iterations_since_restore": 178, "perf": {"cpu_util_percent": 34.43636363636364, "ram_util_percent": 58.54318181818183}} -{"episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 577.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.99}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.38, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.28, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 530.0, 576.0, 579.0, 582.0, 630.0, 582.0, 525.0, 627.0, 570.0, 504.0, 579.0, 582.0, 579.0, 576.0, 570.0, 579.0, 579.0, 576.0, 582.0, 519.0, 573.0, 579.0, 573.0, 582.0, 579.0, 582.0, 582.0, 587.0, 519.0, 573.0, 579.0, 633.0, 590.0, 579.0, 573.0, 587.0, 582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 272.0, 258.0, 278.0, 298.0, 290.0, 289.0, 291.0, 291.0, 312.0, 318.0, 290.0, 292.0, 272.0, 253.0, 314.0, 313.0, 275.0, 295.0, 261.0, 243.0, 298.0, 281.0, 291.0, 291.0, 299.0, 280.0, 294.0, 282.0, 287.0, 283.0, 288.0, 291.0, 288.0, 291.0, 288.0, 288.0, 302.0, 280.0, 267.0, 252.0, 286.0, 287.0, 288.0, 291.0, 285.0, 288.0, 283.0, 299.0, 290.0, 289.0, 298.0, 284.0, 289.0, 293.0, 296.0, 291.0, 276.0, 243.0, 300.0, 273.0, 286.0, 293.0, 309.0, 324.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2901493032290514, "mean_processing_ms": 0.3260293499521716, "mean_inference_ms": 1.842905806043276}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4296000, "num_steps_sampled": 2291200, "sample_time_ms": 20658.749, "load_time_ms": 37.127, "grad_time_ms": 8627.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005556942895054817, "policy_loss": -0.0025492331478744745, "vf_loss": 86.67485809326172, "vf_explained_var": 0.7664775848388672, "kl": 0.0018904004245996475, "entropy": 1.1226133108139038, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2291200, "episodes_total": 5728, "training_iteration": 179, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-14-26", "timestamp": 1660252466, "time_this_iter_s": 32.01629400253296, "time_total_s": 10883.640350818634, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10883.640350818634, "timesteps_since_restore": 2291200, "iterations_since_restore": 179, "perf": {"cpu_util_percent": 31.479999999999997, "ram_util_percent": 58.526666666666664}} -{"episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 578.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 289.07}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.54, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0, 590.0, 579.0, 573.0, 587.0, 582.0, 570.0, 519.0, 579.0, 582.0, 627.0, 576.0, 582.0, 579.0, 573.0, 582.0, 582.0, 527.0, 582.0, 579.0, 587.0, 630.0, 582.0, 576.0, 579.0, 570.0, 582.0, 582.0, 579.0, 525.0, 579.0, 576.0, 587.0, 584.0, 582.0, 573.0, 587.0, 587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 302.0, 288.0, 292.0, 287.0, 289.0, 284.0, 292.0, 295.0, 289.0, 293.0, 281.0, 289.0, 270.0, 249.0, 292.0, 287.0, 286.0, 296.0, 305.0, 322.0, 289.0, 287.0, 295.0, 287.0, 300.0, 279.0, 290.0, 283.0, 285.0, 297.0, 289.0, 293.0, 254.0, 273.0, 294.0, 288.0, 291.0, 288.0, 301.0, 286.0, 314.0, 316.0, 295.0, 287.0, 289.0, 287.0, 277.0, 302.0, 292.0, 278.0, 278.0, 304.0, 288.0, 294.0, 291.0, 288.0, 259.0, 266.0, 275.0, 304.0, 299.0, 277.0, 296.0, 291.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2854556266765977, "mean_processing_ms": 0.3250985864852822, "mean_inference_ms": 1.838409331377913}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4320000, "num_steps_sampled": 2304000, "sample_time_ms": 20831.137, "load_time_ms": 37.325, "grad_time_ms": 8787.85, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003588956082239747, "policy_loss": -0.004645919892936945, "vf_loss": 88.00481414794922, "vf_explained_var": 0.7581232190132141, "kl": 0.0017625847831368446, "entropy": 1.131211280822754, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2304000, "episodes_total": 5760, "training_iteration": 180, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-14-57", "timestamp": 1660252497, "time_this_iter_s": 30.824997186660767, "time_total_s": 10914.465348005295, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10914.465348005295, "timesteps_since_restore": 2304000, "iterations_since_restore": 180, "perf": {"cpu_util_percent": 31.02954545454545, "ram_util_percent": 58.488636363636374}} -{"episode_reward_max": 630.0, "episode_reward_min": 501.0, "episode_reward_mean": 575.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.89}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.38, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.23, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.04, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0, 584.0, 582.0, 573.0, 587.0, 587.0, 561.0, 579.0, 582.0, 627.0, 527.0, 579.0, 576.0, 582.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 561.0, 576.0, 587.0, 630.0, 582.0, 627.0, 579.0, 582.0, 579.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 290.0, 294.0, 288.0, 294.0, 288.0, 285.0, 290.0, 297.0, 291.0, 296.0, 273.0, 288.0, 294.0, 285.0, 285.0, 297.0, 318.0, 309.0, 264.0, 263.0, 287.0, 292.0, 280.0, 296.0, 288.0, 294.0, 293.0, 289.0, 288.0, 294.0, 300.0, 279.0, 294.0, 285.0, 288.0, 291.0, 294.0, 288.0, 278.0, 301.0, 279.0, 282.0, 290.0, 286.0, 302.0, 285.0, 320.0, 310.0, 297.0, 285.0, 315.0, 312.0, 295.0, 284.0, 304.0, 278.0, 278.0, 301.0, 289.0, 293.0, 290.0, 292.0, 275.0, 301.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0]}, "sampler_perf": {"mean_env_wait_ms": 1.280816050662255, "mean_processing_ms": 0.3241780547884837, "mean_inference_ms": 1.833999332912814}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4344000, "num_steps_sampled": 2316800, "sample_time_ms": 21054.918, "load_time_ms": 37.113, "grad_time_ms": 8943.853, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029653788078576326, "policy_loss": -0.005187256261706352, "vf_loss": 87.18419647216797, "vf_explained_var": 0.7553746104240417, "kl": 0.0017378958873450756, "entropy": 1.13156259059906, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2316800, "episodes_total": 5792, "training_iteration": 181, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-15-28", "timestamp": 1660252528, "time_this_iter_s": 30.97549271583557, "time_total_s": 10945.44084072113, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10945.44084072113, "timesteps_since_restore": 2316800, "iterations_since_restore": 181, "perf": {"cpu_util_percent": 34.48409090909092, "ram_util_percent": 58.6068181818182}} -{"episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 573.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.56}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.72, "shaped_reward_min": 141, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0, 582.0, 579.0, 579.0, 576.0, 579.0, 576.0, 567.0, 530.0, 627.0, 582.0, 579.0, 587.0, 579.0, 579.0, 579.0, 579.0, 582.0, 582.0, 576.0, 576.0, 516.0, 579.0, 587.0, 582.0, 570.0, 570.0, 519.0, 573.0, 582.0, 576.0, 579.0, 579.0, 576.0, 530.0, 582.0, 624.0, 521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 296.0, 283.0, 289.0, 287.0, 288.0, 279.0, 270.0, 260.0, 309.0, 318.0, 298.0, 284.0, 276.0, 303.0, 291.0, 296.0, 298.0, 281.0, 283.0, 296.0, 284.0, 295.0, 277.0, 302.0, 297.0, 285.0, 294.0, 288.0, 293.0, 283.0, 284.0, 292.0, 263.0, 253.0, 290.0, 289.0, 302.0, 285.0, 297.0, 285.0, 291.0, 279.0, 294.0, 276.0, 261.0, 258.0, 282.0, 291.0, 293.0, 289.0, 293.0, 283.0, 280.0, 299.0, 282.0, 297.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2762174298521924, "mean_processing_ms": 0.3232647451131093, "mean_inference_ms": 1.8295321417191508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4368000, "num_steps_sampled": 2329600, "sample_time_ms": 20991.973, "load_time_ms": 37.186, "grad_time_ms": 9226.22, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002576154889538884, "policy_loss": -0.005821262951940298, "vf_loss": 89.62581634521484, "vf_explained_var": 0.7608991265296936, "kl": 0.002179400995373726, "entropy": 1.1303036212921143, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2329600, "episodes_total": 5824, "training_iteration": 182, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-15-59", "timestamp": 1660252559, "time_this_iter_s": 30.775686264038086, "time_total_s": 10976.216526985168, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 10976.216526985168, "timesteps_since_restore": 2329600, "iterations_since_restore": 182, "perf": {"cpu_util_percent": 31.2, "ram_util_percent": 58.5}} -{"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 575.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.805}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.61, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0, 576.0, 530.0, 582.0, 624.0, 521.0, 576.0, 573.0, 539.0, 579.0, 582.0, 536.0, 630.0, 579.0, 582.0, 582.0, 579.0, 576.0, 582.0, 558.0, 579.0, 552.0, 576.0, 579.0, 587.0, 579.0, 579.0, 587.0, 567.0, 582.0, 576.0, 501.0, 504.0, 573.0, 587.0, 587.0, 579.0, 576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 298.0, 278.0, 269.0, 261.0, 285.0, 297.0, 298.0, 326.0, 267.0, 254.0, 283.0, 293.0, 285.0, 288.0, 275.0, 264.0, 288.0, 291.0, 304.0, 278.0, 267.0, 269.0, 323.0, 307.0, 298.0, 281.0, 303.0, 279.0, 297.0, 285.0, 280.0, 299.0, 277.0, 299.0, 286.0, 296.0, 286.0, 272.0, 308.0, 271.0, 275.0, 277.0, 292.0, 284.0, 276.0, 303.0, 286.0, 301.0, 291.0, 288.0, 301.0, 278.0, 292.0, 295.0, 291.0, 276.0, 293.0, 289.0, 293.0, 283.0, 254.0, 247.0, 246.0, 258.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2716706020385649, "mean_processing_ms": 0.32236021091809197, "mean_inference_ms": 1.8252783373393515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4392000, "num_steps_sampled": 2342400, "sample_time_ms": 21166.824, "load_time_ms": 37.427, "grad_time_ms": 9604.389, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001810177811421454, "policy_loss": -0.006374426186084747, "vf_loss": 87.48321533203125, "vf_explained_var": 0.7590639591217041, "kl": 0.00198071519844234, "entropy": 1.1274290084838867, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2342400, "episodes_total": 5856, "training_iteration": 183, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-16-33", "timestamp": 1660252593, "time_this_iter_s": 34.10594201087952, "time_total_s": 11010.322468996048, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11010.322468996048, "timesteps_since_restore": 2342400, "iterations_since_restore": 183, "perf": {"cpu_util_percent": 32.16041666666667, "ram_util_percent": 58.54791666666666}} -{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 581.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 290.625}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.65, "shaped_reward_min": 145, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0, 573.0, 587.0, 587.0, 579.0, 576.0, 530.0, 530.0, 627.0, 582.0, 579.0, 538.0, 576.0, 582.0, 579.0, 519.0, 582.0, 582.0, 582.0, 630.0, 579.0, 579.0, 576.0, 567.0, 576.0, 465.0, 587.0, 582.0, 587.0, 570.0, 582.0, 579.0, 573.0, 582.0, 584.0, 627.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 281.0, 292.0, 286.0, 301.0, 295.0, 292.0, 282.0, 297.0, 285.0, 291.0, 272.0, 258.0, 270.0, 260.0, 313.0, 314.0, 281.0, 301.0, 285.0, 294.0, 263.0, 275.0, 280.0, 296.0, 292.0, 290.0, 294.0, 285.0, 270.0, 249.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 318.0, 312.0, 295.0, 284.0, 291.0, 288.0, 288.0, 288.0, 291.0, 276.0, 278.0, 298.0, 227.0, 238.0, 308.0, 279.0, 298.0, 284.0, 313.0, 274.0, 284.0, 286.0, 285.0, 297.0, 270.0, 309.0, 282.0, 291.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.26716887743788, "mean_processing_ms": 0.3214640615198408, "mean_inference_ms": 1.8211055910807965}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4416000, "num_steps_sampled": 2355200, "sample_time_ms": 21369.68, "load_time_ms": 37.437, "grad_time_ms": 9828.237, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0049454327672719955, "policy_loss": -0.002732283202931285, "vf_loss": 82.44231414794922, "vf_explained_var": 0.771254301071167, "kl": 0.0019334623357281089, "entropy": 1.1330214738845825, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2355200, "episodes_total": 5888, "training_iteration": 184, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-17-05", "timestamp": 1660252625, "time_this_iter_s": 31.787577867507935, "time_total_s": 11042.110046863556, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11042.110046863556, "timesteps_since_restore": 2355200, "iterations_since_restore": 184, "perf": {"cpu_util_percent": 30.99777777777778, "ram_util_percent": 58.44666666666665}} -{"episode_reward_max": 636.0, "episode_reward_min": 194.0, "episode_reward_mean": 578.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.095}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 178.59, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.44, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0, 582.0, 584.0, 627.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 576.0, 582.0, 576.0, 573.0, 576.0, 633.0, 525.0, 576.0, 579.0, 579.0, 582.0, 579.0, 536.0, 576.0, 627.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 576.0, 576.0, 630.0, 582.0, 582.0, 573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 284.0, 298.0, 295.0, 289.0, 313.0, 314.0, 285.0, 291.0, 293.0, 289.0, 299.0, 283.0, 290.0, 297.0, 313.0, 314.0, 292.0, 287.0, 284.0, 292.0, 291.0, 291.0, 291.0, 285.0, 291.0, 282.0, 272.0, 304.0, 311.0, 322.0, 255.0, 270.0, 283.0, 293.0, 289.0, 290.0, 294.0, 285.0, 284.0, 298.0, 297.0, 282.0, 261.0, 275.0, 288.0, 288.0, 305.0, 322.0, 294.0, 285.0, 290.0, 286.0, 290.0, 292.0, 295.0, 287.0, 300.0, 279.0, 293.0, 280.0, 293.0, 294.0, 293.0, 283.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2627159762487399, "mean_processing_ms": 0.320576860334056, "mean_inference_ms": 1.8169666521005257}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4440000, "num_steps_sampled": 2368000, "sample_time_ms": 21316.526, "load_time_ms": 37.828, "grad_time_ms": 10200.15, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004557406529784203, "policy_loss": -0.004057899583131075, "vf_loss": 91.82827758789062, "vf_explained_var": 0.7658367156982422, "kl": 0.001969862962141633, "entropy": 1.135046362876892, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2368000, "episodes_total": 5920, "training_iteration": 185, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-17-38", "timestamp": 1660252658, "time_this_iter_s": 32.679043769836426, "time_total_s": 11074.789090633392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11074.789090633392, "timesteps_since_restore": 2368000, "iterations_since_restore": 185, "perf": {"cpu_util_percent": 33.50425531914893, "ram_util_percent": 58.438297872340435}} -{"episode_reward_max": 636.0, "episode_reward_min": 194.0, "episode_reward_mean": 576.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.105}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 177.81, "shaped_reward_min": 74, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0, 576.0, 630.0, 582.0, 582.0, 573.0, 579.0, 630.0, 579.0, 636.0, 582.0, 579.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 582.0, 579.0, 582.0, 576.0, 587.0, 579.0, 582.0, 630.0, 590.0, 579.0, 561.0, 579.0, 579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 296.0, 280.0, 321.0, 309.0, 305.0, 277.0, 288.0, 294.0, 288.0, 285.0, 288.0, 291.0, 318.0, 312.0, 289.0, 290.0, 316.0, 320.0, 296.0, 286.0, 290.0, 289.0, 290.0, 297.0, 286.0, 296.0, 293.0, 289.0, 314.0, 316.0, 301.0, 275.0, 293.0, 289.0, 278.0, 295.0, 318.0, 312.0, 286.0, 296.0, 276.0, 294.0, 298.0, 278.0, 295.0, 287.0, 296.0, 286.0, 293.0, 289.0, 292.0, 287.0, 296.0, 286.0, 282.0, 294.0, 307.0, 280.0, 288.0, 291.0, 290.0, 292.0, 316.0, 314.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0]}, "sampler_perf": {"mean_env_wait_ms": 1.258340305403844, "mean_processing_ms": 0.31970797918049665, "mean_inference_ms": 1.8136714986418816}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4464000, "num_steps_sampled": 2380800, "sample_time_ms": 21817.869, "load_time_ms": 37.793, "grad_time_ms": 10598.659, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005312865134328604, "policy_loss": -0.0029478278011083603, "vf_loss": 88.26638793945312, "vf_explained_var": 0.762065589427948, "kl": 0.0017753179417923093, "entropy": 1.1319037675857544, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2380800, "episodes_total": 5952, "training_iteration": 186, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-18-19", "timestamp": 1660252699, "time_this_iter_s": 41.059054136276245, "time_total_s": 11115.848144769669, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11115.848144769669, "timesteps_since_restore": 2380800, "iterations_since_restore": 186, "perf": {"cpu_util_percent": 27.889655172413793, "ram_util_percent": 58.474137931034484}} -{"episode_reward_max": 633.0, "episode_reward_min": 194.0, "episode_reward_mean": 572.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 93.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.375}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 176.75, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.41, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.41, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.41, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0, 590.0, 579.0, 561.0, 579.0, 579.0, 582.0, 584.0, 576.0, 573.0, 579.0, 515.0, 576.0, 582.0, 576.0, 582.0, 587.0, 522.0, 573.0, 582.0, 519.0, 576.0, 582.0, 518.0, 570.0, 633.0, 582.0, 194.0, 582.0, 630.0, 587.0, 576.0, 579.0, 527.0, 576.0, 630.0, 587.0, 570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 298.0, 292.0, 293.0, 286.0, 278.0, 283.0, 299.0, 280.0, 298.0, 281.0, 286.0, 296.0, 283.0, 301.0, 286.0, 290.0, 282.0, 291.0, 292.0, 287.0, 260.0, 255.0, 275.0, 301.0, 294.0, 288.0, 280.0, 296.0, 291.0, 291.0, 288.0, 299.0, 259.0, 263.0, 272.0, 301.0, 303.0, 279.0, 261.0, 258.0, 278.0, 298.0, 286.0, 296.0, 268.0, 250.0, 287.0, 283.0, 313.0, 320.0, 283.0, 299.0, 93.0, 101.0, 287.0, 295.0, 315.0, 315.0, 300.0, 287.0, 279.0, 297.0, 288.0, 291.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2540029262486032, "mean_processing_ms": 0.31884364369781465, "mean_inference_ms": 1.8101035219779944}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4488000, "num_steps_sampled": 2393600, "sample_time_ms": 21725.743, "load_time_ms": 37.383, "grad_time_ms": 10590.941, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005064256023615599, "policy_loss": -0.0036211840342730284, "vf_loss": 92.49484252929688, "vf_explained_var": 0.7542417645454407, "kl": 0.001856558839790523, "entropy": 1.1280810832977295, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2393600, "episodes_total": 5984, "training_iteration": 187, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-18-47", "timestamp": 1660252727, "time_this_iter_s": 28.611520051956177, "time_total_s": 11144.459664821625, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11144.459664821625, "timesteps_since_restore": 2393600, "iterations_since_restore": 187, "perf": {"cpu_util_percent": 32.7725, "ram_util_percent": 58.567499999999995}} -{"episode_reward_max": 636.0, "episode_reward_min": 461.0, "episode_reward_mean": 580.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 290.15}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.1, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.74, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0, 527.0, 576.0, 630.0, 587.0, 570.0, 627.0, 527.0, 582.0, 624.0, 581.0, 576.0, 576.0, 584.0, 576.0, 587.0, 527.0, 582.0, 576.0, 498.0, 579.0, 582.0, 518.0, 630.0, 582.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 468.0, 627.0, 576.0, 582.0, 582.0, 576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 262.0, 265.0, 277.0, 299.0, 306.0, 324.0, 285.0, 302.0, 286.0, 284.0, 310.0, 317.0, 277.0, 250.0, 281.0, 301.0, 316.0, 308.0, 289.0, 292.0, 288.0, 288.0, 283.0, 293.0, 286.0, 298.0, 285.0, 291.0, 302.0, 285.0, 270.0, 257.0, 298.0, 284.0, 277.0, 299.0, 255.0, 243.0, 287.0, 292.0, 293.0, 289.0, 250.0, 268.0, 316.0, 314.0, 290.0, 292.0, 305.0, 271.0, 278.0, 309.0, 300.0, 282.0, 294.0, 288.0, 284.0, 298.0, 313.0, 314.0, 297.0, 285.0, 227.0, 241.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2497077213230872, "mean_processing_ms": 0.3179872495448108, "mean_inference_ms": 1.8063396156782892}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4512000, "num_steps_sampled": 2406400, "sample_time_ms": 21491.638, "load_time_ms": 36.743, "grad_time_ms": 10320.581, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005896018352359533, "policy_loss": -0.002354246797040105, "vf_loss": 88.0772933959961, "vf_explained_var": 0.767683744430542, "kl": 0.0020883409306406975, "entropy": 1.1149283647537231, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2406400, "episodes_total": 6016, "training_iteration": 188, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-19-13", "timestamp": 1660252753, "time_this_iter_s": 25.91284203529358, "time_total_s": 11170.372506856918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11170.372506856918, "timesteps_since_restore": 2406400, "iterations_since_restore": 188, "perf": {"cpu_util_percent": 33.778378378378385, "ram_util_percent": 58.56486486486485}} -{"episode_reward_max": 636.0, "episode_reward_min": 458.0, "episode_reward_mean": 582.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 291.145}, "custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.89, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.4, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.32, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.4, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.32, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.4, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.32, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0, 627.0, 576.0, 582.0, 582.0, 576.0, 582.0, 510.0, 579.0, 587.0, 582.0, 579.0, 627.0, 576.0, 576.0, 530.0, 579.0, 587.0, 582.0, 587.0, 461.0, 576.0, 582.0, 573.0, 630.0, 587.0, 582.0, 582.0, 627.0, 579.0, 573.0, 582.0, 630.0, 579.0, 576.0, 579.0, 576.0, 576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 310.0, 317.0, 289.0, 287.0, 294.0, 288.0, 293.0, 289.0, 283.0, 293.0, 287.0, 295.0, 258.0, 252.0, 291.0, 288.0, 296.0, 291.0, 298.0, 284.0, 285.0, 294.0, 315.0, 312.0, 289.0, 287.0, 285.0, 291.0, 270.0, 260.0, 285.0, 294.0, 282.0, 305.0, 297.0, 285.0, 301.0, 286.0, 226.0, 235.0, 292.0, 284.0, 287.0, 295.0, 285.0, 288.0, 308.0, 322.0, 287.0, 300.0, 295.0, 287.0, 292.0, 290.0, 304.0, 323.0, 286.0, 293.0, 289.0, 284.0, 284.0, 298.0, 326.0, 304.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0]}, "sampler_perf": {"mean_env_wait_ms": 1.245424462248642, "mean_processing_ms": 0.3171301897785842, "mean_inference_ms": 1.801636761317335}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4536000, "num_steps_sampled": 2419200, "sample_time_ms": 21345.619, "load_time_ms": 36.86, "grad_time_ms": 10234.708, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035058397334069014, "policy_loss": -0.0047208876349031925, "vf_loss": 87.84651947021484, "vf_explained_var": 0.7590529918670654, "kl": 0.0018027568003162742, "entropy": 1.1158560514450073, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2419200, "episodes_total": 6048, "training_iteration": 189, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-19-43", "timestamp": 1660252783, "time_this_iter_s": 29.704707860946655, "time_total_s": 11200.077214717865, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11200.077214717865, "timesteps_since_restore": 2419200, "iterations_since_restore": 189, "perf": {"cpu_util_percent": 33.13571428571428, "ram_util_percent": 58.52142857142859}} -{"episode_reward_max": 636.0, "episode_reward_min": 458.0, "episode_reward_mean": 581.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 337.0}, "policy_reward_mean": {"ppo": 290.745}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.89, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.76, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.65, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.04, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.93, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.65, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.04, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.65, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.04, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0, 579.0, 576.0, 579.0, 576.0, 576.0, 570.0, 582.0, 570.0, 579.0, 576.0, 593.0, 587.0, 582.0, 582.0, 579.0, 576.0, 633.0, 636.0, 636.0, 587.0, 579.0, 582.0, 582.0, 519.0, 582.0, 570.0, 633.0, 579.0, 587.0, 581.0, 579.0, 582.0, 582.0, 630.0, 582.0, 567.0, 579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 297.0, 282.0, 285.0, 291.0, 281.0, 298.0, 285.0, 291.0, 295.0, 281.0, 294.0, 276.0, 279.0, 303.0, 289.0, 281.0, 298.0, 281.0, 278.0, 298.0, 295.0, 298.0, 292.0, 295.0, 286.0, 296.0, 297.0, 285.0, 289.0, 290.0, 275.0, 301.0, 325.0, 308.0, 317.0, 319.0, 310.0, 326.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 301.0, 281.0, 259.0, 260.0, 292.0, 290.0, 279.0, 291.0, 296.0, 337.0, 283.0, 296.0, 289.0, 298.0, 295.0, 286.0, 292.0, 287.0, 281.0, 301.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2411902050042325, "mean_processing_ms": 0.3162859757027142, "mean_inference_ms": 1.7969666432132458}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4560000, "num_steps_sampled": 2432000, "sample_time_ms": 21237.531, "load_time_ms": 36.84, "grad_time_ms": 10068.85, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006991778966039419, "policy_loss": -0.0012481998419389129, "vf_loss": 87.9997329711914, "vf_explained_var": 0.7513763904571533, "kl": 0.0021018313709646463, "entropy": 1.119996428489685, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2432000, "episodes_total": 6080, "training_iteration": 190, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-20-11", "timestamp": 1660252811, "time_this_iter_s": 28.08810520172119, "time_total_s": 11228.165319919586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11228.165319919586, "timesteps_since_restore": 2432000, "iterations_since_restore": 190, "perf": {"cpu_util_percent": 33.77, "ram_util_percent": 58.345000000000006}} -{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 580.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 290.06}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 178.92, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.37, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.37, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.37, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0, 582.0, 630.0, 582.0, 567.0, 579.0, 458.0, 587.0, 579.0, 516.0, 582.0, 579.0, 590.0, 587.0, 582.0, 630.0, 533.0, 579.0, 630.0, 582.0, 630.0, 576.0, 582.0, 582.0, 587.0, 582.0, 582.0, 576.0, 584.0, 567.0, 582.0, 584.0, 576.0, 564.0, 582.0, 627.0, 633.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 306.0, 276.0, 308.0, 322.0, 288.0, 294.0, 290.0, 277.0, 287.0, 292.0, 226.0, 232.0, 298.0, 289.0, 290.0, 289.0, 249.0, 267.0, 297.0, 285.0, 303.0, 276.0, 301.0, 289.0, 283.0, 304.0, 293.0, 289.0, 319.0, 311.0, 268.0, 265.0, 283.0, 296.0, 322.0, 308.0, 288.0, 294.0, 311.0, 319.0, 286.0, 290.0, 283.0, 299.0, 283.0, 299.0, 295.0, 292.0, 286.0, 296.0, 278.0, 304.0, 289.0, 287.0, 284.0, 300.0, 290.0, 277.0, 283.0, 299.0, 309.0, 275.0, 282.0, 294.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.237012566934364, "mean_processing_ms": 0.3154539706719903, "mean_inference_ms": 1.7926176479402052}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4584000, "num_steps_sampled": 2444800, "sample_time_ms": 21212.772, "load_time_ms": 37.115, "grad_time_ms": 9943.576, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002480272436514497, "policy_loss": -0.005884131882339716, "vf_loss": 89.30957794189453, "vf_explained_var": 0.7648332118988037, "kl": 0.0016885297372937202, "entropy": 1.1330945491790771, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2444800, "episodes_total": 6112, "training_iteration": 191, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-20-41", "timestamp": 1660252841, "time_this_iter_s": 29.47701120376587, "time_total_s": 11257.642331123352, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11257.642331123352, "timesteps_since_restore": 2444800, "iterations_since_restore": 191, "perf": {"cpu_util_percent": 32.38536585365854, "ram_util_percent": 58.368292682926814}} -{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 583.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 291.615}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.63, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.69, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0, 564.0, 582.0, 627.0, 633.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 521.0, 582.0, 579.0, 582.0, 522.0, 582.0, 527.0, 576.0, 522.0, 582.0, 582.0, 627.0, 576.0, 587.0, 627.0, 582.0, 579.0, 587.0, 573.0, 584.0, 630.0, 576.0, 579.0, 579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 285.0, 279.0, 296.0, 286.0, 318.0, 309.0, 319.0, 314.0, 290.0, 289.0, 288.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 288.0, 286.0, 296.0, 297.0, 285.0, 282.0, 297.0, 293.0, 289.0, 270.0, 251.0, 295.0, 287.0, 288.0, 291.0, 285.0, 297.0, 263.0, 259.0, 285.0, 297.0, 275.0, 252.0, 290.0, 286.0, 279.0, 243.0, 294.0, 288.0, 290.0, 292.0, 311.0, 316.0, 289.0, 287.0, 295.0, 292.0, 314.0, 313.0, 291.0, 291.0, 288.0, 291.0, 304.0, 283.0, 292.0, 281.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2328827841512373, "mean_processing_ms": 0.3146313961274523, "mean_inference_ms": 1.788385259276164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4608000, "num_steps_sampled": 2457600, "sample_time_ms": 21198.373, "load_time_ms": 37.126, "grad_time_ms": 9801.163, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001665265765041113, "policy_loss": -0.006540585309267044, "vf_loss": 87.64542388916016, "vf_explained_var": 0.7604849338531494, "kl": 0.0022042018827050924, "entropy": 1.1173783540725708, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2457600, "episodes_total": 6144, "training_iteration": 192, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-21-10", "timestamp": 1660252870, "time_this_iter_s": 29.205125331878662, "time_total_s": 11286.84745645523, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11286.84745645523, "timesteps_since_restore": 2457600, "iterations_since_restore": 192, "perf": {"cpu_util_percent": 31.859523809523814, "ram_util_percent": 58.37619047619048}} -{"episode_reward_max": 636.0, "episode_reward_min": 441.0, "episode_reward_mean": 586.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 293.055}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.51, "shaped_reward_min": 121, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.98, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0, 584.0, 630.0, 576.0, 579.0, 579.0, 579.0, 441.0, 582.0, 636.0, 582.0, 576.0, 582.0, 579.0, 621.0, 567.0, 579.0, 576.0, 567.0, 579.0, 582.0, 587.0, 573.0, 522.0, 581.0, 633.0, 627.0, 579.0, 587.0, 573.0, 579.0, 627.0, 587.0, 579.0, 582.0, 627.0, 522.0, 579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 290.0, 294.0, 320.0, 310.0, 286.0, 290.0, 283.0, 296.0, 292.0, 287.0, 295.0, 284.0, 220.0, 221.0, 286.0, 296.0, 320.0, 316.0, 294.0, 288.0, 298.0, 278.0, 293.0, 289.0, 288.0, 291.0, 319.0, 302.0, 292.0, 275.0, 296.0, 283.0, 289.0, 287.0, 298.0, 269.0, 289.0, 290.0, 288.0, 294.0, 287.0, 300.0, 290.0, 283.0, 256.0, 266.0, 299.0, 282.0, 321.0, 312.0, 311.0, 316.0, 296.0, 283.0, 303.0, 284.0, 282.0, 291.0, 286.0, 293.0, 321.0, 306.0, 295.0, 292.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2287946124429, "mean_processing_ms": 0.3138148366539807, "mean_inference_ms": 1.7841696712783897}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4632000, "num_steps_sampled": 2470400, "sample_time_ms": 20938.204, "load_time_ms": 37.066, "grad_time_ms": 9514.831, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005852494388818741, "policy_loss": -0.0018877206603065133, "vf_loss": 83.014892578125, "vf_explained_var": 0.7724275588989258, "kl": 0.0019637763034552336, "entropy": 1.1225537061691284, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2470400, "episodes_total": 6176, "training_iteration": 193, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-21-39", "timestamp": 1660252899, "time_this_iter_s": 28.640799045562744, "time_total_s": 11315.488255500793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11315.488255500793, "timesteps_since_restore": 2470400, "iterations_since_restore": 193, "perf": {"cpu_util_percent": 32.46, "ram_util_percent": 58.379999999999995}} -{"episode_reward_max": 636.0, "episode_reward_min": 513.0, "episode_reward_mean": 587.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 293.645}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.69, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0, 579.0, 582.0, 627.0, 522.0, 579.0, 579.0, 587.0, 582.0, 630.0, 584.0, 579.0, 587.0, 584.0, 630.0, 522.0, 573.0, 630.0, 582.0, 570.0, 627.0, 627.0, 573.0, 579.0, 582.0, 582.0, 582.0, 633.0, 582.0, 576.0, 630.0, 582.0, 573.0, 579.0, 544.0, 579.0, 627.0, 587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 285.0, 294.0, 285.0, 297.0, 303.0, 324.0, 261.0, 261.0, 291.0, 288.0, 288.0, 291.0, 289.0, 298.0, 296.0, 286.0, 311.0, 319.0, 301.0, 283.0, 298.0, 281.0, 295.0, 292.0, 295.0, 289.0, 311.0, 319.0, 260.0, 262.0, 289.0, 284.0, 331.0, 299.0, 296.0, 286.0, 288.0, 282.0, 328.0, 299.0, 315.0, 312.0, 289.0, 284.0, 290.0, 289.0, 284.0, 298.0, 300.0, 282.0, 299.0, 283.0, 318.0, 315.0, 291.0, 291.0, 285.0, 291.0, 317.0, 313.0, 281.0, 301.0, 290.0, 283.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2247614188456033, "mean_processing_ms": 0.3130098403023273, "mean_inference_ms": 1.7801660461855682}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4656000, "num_steps_sampled": 2483200, "sample_time_ms": 21025.315, "load_time_ms": 37.175, "grad_time_ms": 9483.469, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005179767496883869, "policy_loss": -0.003016052069142461, "vf_loss": 87.5873031616211, "vf_explained_var": 0.7668092250823975, "kl": 0.0019739444833248854, "entropy": 1.125815987586975, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2483200, "episodes_total": 6208, "training_iteration": 194, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-22-11", "timestamp": 1660252931, "time_this_iter_s": 32.347792863845825, "time_total_s": 11347.83604836464, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11347.83604836464, "timesteps_since_restore": 2483200, "iterations_since_restore": 194, "perf": {"cpu_util_percent": 33.310869565217395, "ram_util_percent": 58.36521739130432}} -{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 584.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.085}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.97, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.24, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.83, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.18, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.27, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.83, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.18, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.83, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.18, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 544.0, 579.0, 627.0, 587.0, 587.0, 582.0, 636.0, 579.0, 570.0, 513.0, 579.0, 582.0, 587.0, 579.0, 582.0, 564.0, 633.0, 587.0, 630.0, 627.0, 579.0, 636.0, 582.0, 587.0, 584.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 579.0, 587.0, 579.0, 567.0, 633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 290.0, 289.0, 269.0, 275.0, 291.0, 288.0, 316.0, 311.0, 291.0, 296.0, 292.0, 295.0, 274.0, 308.0, 312.0, 324.0, 304.0, 275.0, 287.0, 283.0, 261.0, 252.0, 277.0, 302.0, 298.0, 284.0, 291.0, 296.0, 278.0, 301.0, 285.0, 297.0, 285.0, 279.0, 323.0, 310.0, 278.0, 309.0, 308.0, 322.0, 311.0, 316.0, 288.0, 291.0, 314.0, 322.0, 286.0, 296.0, 294.0, 293.0, 286.0, 298.0, 293.0, 289.0, 294.0, 288.0, 312.0, 315.0, 287.0, 295.0, 295.0, 284.0, 293.0, 286.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2207711535428367, "mean_processing_ms": 0.31221340698687855, "mean_inference_ms": 1.7762098630677763}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4680000, "num_steps_sampled": 2496000, "sample_time_ms": 21023.805, "load_time_ms": 36.814, "grad_time_ms": 9071.865, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004175250884145498, "policy_loss": -0.004759882111102343, "vf_loss": 95.0163803100586, "vf_explained_var": 0.7534318566322327, "kl": 0.0021568441297858953, "entropy": 1.1329950094223022, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2496000, "episodes_total": 6240, "training_iteration": 195, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-22-40", "timestamp": 1660252960, "time_this_iter_s": 28.54381275177002, "time_total_s": 11376.37986111641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11376.37986111641, "timesteps_since_restore": 2496000, "iterations_since_restore": 195, "perf": {"cpu_util_percent": 32.82000000000001, "ram_util_percent": 58.44}} -{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 583.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.515}, "custom_metrics": {"sparse_reward_mean": 201.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.23, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.29, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.81, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0, 579.0, 587.0, 579.0, 567.0, 633.0, 576.0, 582.0, 579.0, 636.0, 576.0, 582.0, 590.0, 584.0, 582.0, 573.0, 582.0, 525.0, 573.0, 587.0, 624.0, 576.0, 579.0, 582.0, 636.0, 582.0, 579.0, 581.0, 576.0, 587.0, 582.0, 582.0, 582.0, 582.0, 590.0, 582.0, 587.0, 573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 283.0, 298.0, 289.0, 284.0, 295.0, 288.0, 279.0, 322.0, 311.0, 295.0, 281.0, 288.0, 294.0, 303.0, 276.0, 317.0, 319.0, 288.0, 288.0, 290.0, 292.0, 291.0, 299.0, 292.0, 292.0, 290.0, 292.0, 284.0, 289.0, 293.0, 289.0, 260.0, 265.0, 272.0, 301.0, 286.0, 301.0, 314.0, 310.0, 280.0, 296.0, 275.0, 304.0, 281.0, 301.0, 316.0, 320.0, 292.0, 290.0, 300.0, 279.0, 297.0, 284.0, 285.0, 291.0, 294.0, 293.0, 291.0, 291.0, 293.0, 289.0, 287.0, 295.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2168243946409252, "mean_processing_ms": 0.31142476273286085, "mean_inference_ms": 1.7722672001307933}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4704000, "num_steps_sampled": 2508800, "sample_time_ms": 20149.8, "load_time_ms": 36.819, "grad_time_ms": 8683.766, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005854760762304068, "policy_loss": -0.0029166024178266525, "vf_loss": 93.378173828125, "vf_explained_var": 0.7535201907157898, "kl": 0.00207762373611331, "entropy": 1.1329069137573242, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2508800, "episodes_total": 6272, "training_iteration": 196, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-23-08", "timestamp": 1660252988, "time_this_iter_s": 28.434014320373535, "time_total_s": 11404.813875436783, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11404.813875436783, "timesteps_since_restore": 2508800, "iterations_since_restore": 196, "perf": {"cpu_util_percent": 34.197500000000005, "ram_util_percent": 58.575}} -{"episode_reward_max": 636.0, "episode_reward_min": 460.0, "episode_reward_mean": 581.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 290.86}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.72, "shaped_reward_min": 140, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.95, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.05, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0, 582.0, 590.0, 582.0, 587.0, 573.0, 582.0, 579.0, 579.0, 579.0, 536.0, 636.0, 576.0, 579.0, 582.0, 590.0, 587.0, 512.0, 582.0, 576.0, 567.0, 579.0, 579.0, 576.0, 579.0, 582.0, 460.0, 582.0, 633.0, 582.0, 573.0, 630.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 292.0, 290.0, 301.0, 289.0, 286.0, 296.0, 294.0, 293.0, 296.0, 277.0, 290.0, 292.0, 286.0, 293.0, 288.0, 291.0, 285.0, 294.0, 266.0, 270.0, 326.0, 310.0, 290.0, 286.0, 293.0, 286.0, 291.0, 291.0, 303.0, 287.0, 295.0, 292.0, 247.0, 265.0, 283.0, 299.0, 298.0, 278.0, 289.0, 278.0, 283.0, 296.0, 293.0, 286.0, 301.0, 275.0, 287.0, 292.0, 291.0, 291.0, 229.0, 231.0, 281.0, 301.0, 313.0, 320.0, 290.0, 292.0, 281.0, 292.0, 322.0, 308.0, 301.0, 281.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2129132177848776, "mean_processing_ms": 0.31064427839869574, "mean_inference_ms": 1.768252985066466}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4728000, "num_steps_sampled": 2521600, "sample_time_ms": 20346.302, "load_time_ms": 37.457, "grad_time_ms": 8681.875, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004015960264950991, "policy_loss": -0.004478786140680313, "vf_loss": 90.65137481689453, "vf_explained_var": 0.7576496601104736, "kl": 0.0018428467446938157, "entropy": 1.140787959098816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2521600, "episodes_total": 6304, "training_iteration": 197, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-23-39", "timestamp": 1660253019, "time_this_iter_s": 30.565216064453125, "time_total_s": 11435.379091501236, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11435.379091501236, "timesteps_since_restore": 2521600, "iterations_since_restore": 197, "perf": {"cpu_util_percent": 35.77272727272727, "ram_util_percent": 59.17272727272728}} -{"episode_reward_max": 633.0, "episode_reward_min": 498.0, "episode_reward_mean": 580.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 290.44}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.28, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.24, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0, 576.0, 587.0, 579.0, 630.0, 579.0, 519.0, 510.0, 584.0, 587.0, 630.0, 587.0, 582.0, 587.0, 579.0, 573.0, 579.0, 633.0, 579.0, 587.0, 633.0, 579.0, 582.0, 579.0, 570.0, 630.0, 630.0, 579.0, 573.0, 582.0, 573.0, 579.0, 567.0, 582.0, 573.0, 630.0, 582.0, 587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 288.0, 288.0, 306.0, 281.0, 290.0, 289.0, 322.0, 308.0, 298.0, 281.0, 257.0, 262.0, 242.0, 268.0, 289.0, 295.0, 298.0, 289.0, 313.0, 317.0, 285.0, 302.0, 286.0, 296.0, 285.0, 302.0, 283.0, 296.0, 289.0, 284.0, 296.0, 283.0, 314.0, 319.0, 285.0, 294.0, 293.0, 294.0, 306.0, 327.0, 287.0, 292.0, 298.0, 284.0, 298.0, 281.0, 281.0, 289.0, 316.0, 314.0, 311.0, 319.0, 293.0, 286.0, 282.0, 291.0, 292.0, 290.0, 288.0, 285.0, 285.0, 294.0, 274.0, 293.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2090434460421848, "mean_processing_ms": 0.3098731145088783, "mean_inference_ms": 1.7643973758778697}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4752000, "num_steps_sampled": 2534400, "sample_time_ms": 20635.573, "load_time_ms": 37.478, "grad_time_ms": 8930.84, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.008007452823221684, "policy_loss": -0.00045695496373809874, "vf_loss": 90.38675689697266, "vf_explained_var": 0.7534659504890442, "kl": 0.0025916944723576307, "entropy": 1.1485199928283691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2534400, "episodes_total": 6336, "training_iteration": 198, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-24-10", "timestamp": 1660253050, "time_this_iter_s": 31.295607089996338, "time_total_s": 11466.674698591232, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11466.674698591232, "timesteps_since_restore": 2534400, "iterations_since_restore": 198, "perf": {"cpu_util_percent": 30.265909090909087, "ram_util_percent": 58.70227272727273}} -{"episode_reward_max": 633.0, "episode_reward_min": 498.0, "episode_reward_mean": 579.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 289.78}, "custom_metrics": {"sparse_reward_mean": 200.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.16, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.02, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0, 582.0, 573.0, 630.0, 582.0, 587.0, 587.0, 587.0, 570.0, 579.0, 564.0, 582.0, 627.0, 539.0, 582.0, 630.0, 582.0, 579.0, 587.0, 573.0, 579.0, 579.0, 633.0, 582.0, 584.0, 582.0, 582.0, 582.0, 587.0, 579.0, 582.0, 579.0, 627.0, 576.0, 582.0, 516.0, 503.0, 630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 293.0, 289.0, 277.0, 296.0, 321.0, 309.0, 288.0, 294.0, 296.0, 291.0, 297.0, 290.0, 287.0, 300.0, 289.0, 281.0, 290.0, 289.0, 273.0, 291.0, 291.0, 291.0, 327.0, 300.0, 272.0, 267.0, 283.0, 299.0, 319.0, 311.0, 292.0, 290.0, 289.0, 290.0, 294.0, 293.0, 288.0, 285.0, 290.0, 289.0, 276.0, 303.0, 310.0, 323.0, 286.0, 296.0, 299.0, 285.0, 295.0, 287.0, 281.0, 301.0, 288.0, 294.0, 289.0, 298.0, 293.0, 286.0, 288.0, 294.0, 281.0, 298.0, 321.0, 306.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2052169809302002, "mean_processing_ms": 0.3091122486533884, "mean_inference_ms": 1.7607399677301792}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4776000, "num_steps_sampled": 2547200, "sample_time_ms": 20764.539, "load_time_ms": 37.37, "grad_time_ms": 8925.603, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002967844484373927, "policy_loss": -0.005320979747921228, "vf_loss": 88.59487915039062, "vf_explained_var": 0.7679054141044617, "kl": 0.0019444593926891685, "entropy": 1.141340732574463, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2547200, "episodes_total": 6368, "training_iteration": 199, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-24-41", "timestamp": 1660253081, "time_this_iter_s": 30.933609008789062, "time_total_s": 11497.608307600021, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11497.608307600021, "timesteps_since_restore": 2547200, "iterations_since_restore": 199, "perf": {"cpu_util_percent": 31.409090909090903, "ram_util_percent": 58.724999999999994}} -{"episode_reward_max": 633.0, "episode_reward_min": 368.0, "episode_reward_mean": 574.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 179.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 287.24}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 177.28, "shaped_reward_min": 128, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.05, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0, 576.0, 582.0, 516.0, 503.0, 630.0, 498.0, 587.0, 573.0, 576.0, 587.0, 582.0, 587.0, 525.0, 576.0, 533.0, 630.0, 576.0, 573.0, 582.0, 582.0, 579.0, 582.0, 527.0, 573.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 280.0, 293.0, 289.0, 250.0, 266.0, 256.0, 247.0, 309.0, 321.0, 253.0, 245.0, 287.0, 300.0, 281.0, 292.0, 288.0, 288.0, 294.0, 293.0, 285.0, 297.0, 288.0, 299.0, 266.0, 259.0, 283.0, 293.0, 265.0, 268.0, 299.0, 331.0, 293.0, 283.0, 293.0, 280.0, 286.0, 296.0, 285.0, 297.0, 299.0, 280.0, 303.0, 279.0, 267.0, 260.0, 293.0, 280.0, 282.0, 294.0, 299.0, 283.0, 295.0, 284.0, 286.0, 293.0, 298.0, 284.0, 293.0, 286.0, 292.0, 284.0, 287.0, 292.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.2014273105628746, "mean_processing_ms": 0.30835462294201915, "mean_inference_ms": 1.7571376095037237}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4800000, "num_steps_sampled": 2560000, "sample_time_ms": 20842.97, "load_time_ms": 37.195, "grad_time_ms": 9109.166, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005231037735939026, "policy_loss": -0.0033684810623526573, "vf_loss": 91.692626953125, "vf_explained_var": 0.7593931555747986, "kl": 0.002331085503101349, "entropy": 1.1394835710525513, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2560000, "episodes_total": 6400, "training_iteration": 200, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-25-12", "timestamp": 1660253112, "time_this_iter_s": 30.703901290893555, "time_total_s": 11528.312208890915, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11528.312208890915, "timesteps_since_restore": 2560000, "iterations_since_restore": 200, "perf": {"cpu_util_percent": 32.688372093023254, "ram_util_percent": 58.67906976744187}} -{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 576.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 288.47}, "custom_metrics": {"sparse_reward_mean": 199.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.74, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.98, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.07, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.63, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.63, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.63, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 576.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 579.0, 576.0, 516.0, 579.0, 590.0, 573.0, 582.0, 587.0, 579.0, 576.0, 576.0, 582.0, 536.0, 582.0, 582.0, 633.0, 582.0, 582.0, 582.0, 579.0, 564.0, 579.0, 582.0, 630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 293.0, 286.0, 288.0, 291.0, 294.0, 285.0, 298.0, 284.0, 301.0, 278.0, 285.0, 297.0, 287.0, 289.0, 284.0, 292.0, 290.0, 292.0, 316.0, 311.0, 280.0, 302.0, 314.0, 316.0, 296.0, 283.0, 293.0, 286.0, 283.0, 293.0, 265.0, 251.0, 288.0, 291.0, 296.0, 294.0, 287.0, 286.0, 291.0, 291.0, 294.0, 293.0, 283.0, 296.0, 298.0, 278.0, 290.0, 286.0, 293.0, 289.0, 266.0, 270.0, 298.0, 284.0, 295.0, 287.0, 318.0, 315.0, 296.0, 286.0, 280.0, 302.0, 288.0, 294.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1976757528000228, "mean_processing_ms": 0.3076033986967843, "mean_inference_ms": 1.7536061115922081}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4824000, "num_steps_sampled": 2572800, "sample_time_ms": 20929.774, "load_time_ms": 36.844, "grad_time_ms": 9259.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019615469500422478, "policy_loss": -0.006233252584934235, "vf_loss": 87.63289642333984, "vf_explained_var": 0.7635285258293152, "kl": 0.0017622611485421658, "entropy": 1.1369844675064087, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2572800, "episodes_total": 6432, "training_iteration": 201, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-25-44", "timestamp": 1660253144, "time_this_iter_s": 31.842552185058594, "time_total_s": 11560.154761075974, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11560.154761075974, "timesteps_since_restore": 2572800, "iterations_since_restore": 201, "perf": {"cpu_util_percent": 24.447826086956525, "ram_util_percent": 58.667391304347845}} -{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 573.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.725}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.05, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.26, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.46, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.88, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.28, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.46, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.88, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.46, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.88, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0, 579.0, 564.0, 579.0, 582.0, 630.0, 576.0, 587.0, 579.0, 579.0, 582.0, 576.0, 489.0, 530.0, 579.0, 570.0, 630.0, 582.0, 587.0, 587.0, 567.0, 587.0, 368.0, 579.0, 498.0, 587.0, 573.0, 582.0, 579.0, 633.0, 587.0, 525.0, 579.0, 567.0, 633.0, 582.0, 582.0, 576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 293.0, 286.0, 287.0, 277.0, 280.0, 299.0, 296.0, 286.0, 316.0, 314.0, 279.0, 297.0, 309.0, 278.0, 286.0, 293.0, 285.0, 294.0, 295.0, 287.0, 284.0, 292.0, 249.0, 240.0, 264.0, 266.0, 273.0, 306.0, 281.0, 289.0, 316.0, 314.0, 283.0, 299.0, 292.0, 295.0, 285.0, 302.0, 289.0, 278.0, 299.0, 288.0, 189.0, 179.0, 293.0, 286.0, 253.0, 245.0, 296.0, 291.0, 291.0, 282.0, 293.0, 289.0, 290.0, 289.0, 322.0, 311.0, 293.0, 294.0, 256.0, 269.0, 280.0, 299.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1939639805139826, "mean_processing_ms": 0.3068617868060299, "mean_inference_ms": 1.750206276185966}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4848000, "num_steps_sampled": 2585600, "sample_time_ms": 21104.737, "load_time_ms": 36.737, "grad_time_ms": 9324.388, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035205576568841934, "policy_loss": -0.004760665353387594, "vf_loss": 88.47342681884766, "vf_explained_var": 0.7671054005622864, "kl": 0.0017035487107932568, "entropy": 1.1322449445724487, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2585600, "episodes_total": 6464, "training_iteration": 202, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-26-16", "timestamp": 1660253176, "time_this_iter_s": 31.605774879455566, "time_total_s": 11591.760535955429, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11591.760535955429, "timesteps_since_restore": 2585600, "iterations_since_restore": 202, "perf": {"cpu_util_percent": 31.240000000000006, "ram_util_percent": 58.77555555555557}} -{"episode_reward_max": 633.0, "episode_reward_min": 299.0, "episode_reward_mean": 580.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 139.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 290.155}, "custom_metrics": {"sparse_reward_mean": 200.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 179.51, "shaped_reward_min": 99, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.71, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0, 567.0, 633.0, 582.0, 582.0, 576.0, 587.0, 627.0, 579.0, 627.0, 579.0, 630.0, 630.0, 630.0, 579.0, 579.0, 299.0, 633.0, 582.0, 582.0, 522.0, 630.0, 582.0, 627.0, 567.0, 564.0, 582.0, 555.0, 582.0, 579.0, 576.0, 579.0, 533.0, 576.0, 576.0, 582.0, 582.0, 579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 284.0, 283.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 309.0, 318.0, 298.0, 281.0, 304.0, 323.0, 297.0, 282.0, 322.0, 308.0, 305.0, 325.0, 308.0, 322.0, 277.0, 302.0, 284.0, 295.0, 160.0, 139.0, 322.0, 311.0, 289.0, 293.0, 304.0, 278.0, 253.0, 269.0, 304.0, 326.0, 302.0, 280.0, 303.0, 324.0, 280.0, 287.0, 285.0, 279.0, 286.0, 296.0, 279.0, 276.0, 295.0, 287.0, 283.0, 296.0, 283.0, 293.0, 282.0, 297.0, 265.0, 268.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.190287620141578, "mean_processing_ms": 0.3061308463803027, "mean_inference_ms": 1.7468323528445506}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4872000, "num_steps_sampled": 2598400, "sample_time_ms": 21208.335, "load_time_ms": 36.421, "grad_time_ms": 9429.96, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0062708244659006596, "policy_loss": -0.0019446747610345483, "vf_loss": 87.80118560791016, "vf_explained_var": 0.7648043632507324, "kl": 0.001872226013801992, "entropy": 1.1292202472686768, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2598400, "episodes_total": 6496, "training_iteration": 203, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-26-47", "timestamp": 1660253207, "time_this_iter_s": 30.729102849960327, "time_total_s": 11622.48963880539, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11622.48963880539, "timesteps_since_restore": 2598400, "iterations_since_restore": 203, "perf": {"cpu_util_percent": 31.509302325581398, "ram_util_percent": 58.665116279069764}} -{"episode_reward_max": 633.0, "episode_reward_min": 475.0, "episode_reward_mean": 581.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 290.785}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 180.37, "shaped_reward_min": 155, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.37, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0, 576.0, 576.0, 582.0, 582.0, 579.0, 533.0, 576.0, 582.0, 579.0, 582.0, 579.0, 582.0, 582.0, 582.0, 479.0, 576.0, 519.0, 525.0, 627.0, 522.0, 582.0, 587.0, 587.0, 627.0, 582.0, 582.0, 582.0, 579.0, 582.0, 579.0, 579.0, 582.0, 475.0, 587.0, 582.0, 579.0, 582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 293.0, 283.0, 289.0, 287.0, 291.0, 291.0, 295.0, 287.0, 294.0, 285.0, 261.0, 272.0, 283.0, 293.0, 297.0, 285.0, 288.0, 291.0, 283.0, 299.0, 291.0, 288.0, 281.0, 301.0, 283.0, 299.0, 299.0, 283.0, 248.0, 231.0, 295.0, 281.0, 264.0, 255.0, 260.0, 265.0, 309.0, 318.0, 274.0, 248.0, 294.0, 288.0, 283.0, 304.0, 301.0, 286.0, 318.0, 309.0, 288.0, 294.0, 293.0, 289.0, 296.0, 286.0, 299.0, 280.0, 281.0, 301.0, 300.0, 279.0, 288.0, 291.0, 289.0, 293.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1866467197845827, "mean_processing_ms": 0.3054065028212096, "mean_inference_ms": 1.7433019705968333}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4896000, "num_steps_sampled": 2611200, "sample_time_ms": 20995.947, "load_time_ms": 36.336, "grad_time_ms": 9492.439, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 3.609728810261004e-05, "policy_loss": -0.0082255182787776, "vf_loss": 88.31702423095703, "vf_explained_var": 0.7638809680938721, "kl": 0.0019561152439564466, "entropy": 1.140177845954895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2611200, "episodes_total": 6528, "training_iteration": 204, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-27-18", "timestamp": 1660253238, "time_this_iter_s": 30.846153020858765, "time_total_s": 11653.335791826248, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11653.335791826248, "timesteps_since_restore": 2611200, "iterations_since_restore": 204, "perf": {"cpu_util_percent": 30.906818181818178, "ram_util_percent": 58.60227272727274}} -{"episode_reward_max": 633.0, "episode_reward_min": 237.0, "episode_reward_mean": 583.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 291.985}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.37, "shaped_reward_min": 77, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.19, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.19, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.19, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0, 475.0, 587.0, 582.0, 579.0, 582.0, 587.0, 564.0, 579.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 630.0, 633.0, 579.0, 539.0, 536.0, 582.0, 582.0, 587.0, 627.0, 587.0, 579.0, 630.0, 584.0, 579.0, 579.0, 579.0, 579.0, 630.0, 633.0, 630.0, 582.0, 630.0, 576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 243.0, 232.0, 300.0, 287.0, 297.0, 285.0, 284.0, 295.0, 285.0, 297.0, 286.0, 301.0, 280.0, 284.0, 288.0, 291.0, 291.0, 288.0, 299.0, 283.0, 301.0, 281.0, 292.0, 290.0, 291.0, 291.0, 293.0, 289.0, 307.0, 323.0, 318.0, 315.0, 294.0, 285.0, 270.0, 269.0, 266.0, 270.0, 291.0, 291.0, 302.0, 280.0, 290.0, 297.0, 316.0, 311.0, 302.0, 285.0, 291.0, 288.0, 321.0, 309.0, 291.0, 293.0, 293.0, 286.0, 286.0, 293.0, 294.0, 285.0, 290.0, 289.0, 320.0, 310.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1830353872295725, "mean_processing_ms": 0.3046858000734139, "mean_inference_ms": 1.7395956173502736}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4920000, "num_steps_sampled": 2624000, "sample_time_ms": 20976.168, "load_time_ms": 36.272, "grad_time_ms": 9605.806, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005292419344186783, "policy_loss": -0.002614696277305484, "vf_loss": 84.73992156982422, "vf_explained_var": 0.7728293538093567, "kl": 0.0027176842559129, "entropy": 1.1337858438491821, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2624000, "episodes_total": 6560, "training_iteration": 205, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-27-47", "timestamp": 1660253267, "time_this_iter_s": 29.478952169418335, "time_total_s": 11682.814743995667, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11682.814743995667, "timesteps_since_restore": 2624000, "iterations_since_restore": 205, "perf": {"cpu_util_percent": 32.5452380952381, "ram_util_percent": 58.56666666666667}} -{"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 586.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 293.06}, "custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.12, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.75, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0, 633.0, 630.0, 582.0, 630.0, 576.0, 587.0, 579.0, 633.0, 582.0, 576.0, 564.0, 633.0, 525.0, 582.0, 579.0, 630.0, 579.0, 573.0, 573.0, 579.0, 587.0, 579.0, 582.0, 579.0, 521.0, 576.0, 530.0, 584.0, 630.0, 587.0, 582.0, 579.0, 582.0, 630.0, 627.0, 582.0, 237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 313.0, 320.0, 311.0, 319.0, 305.0, 277.0, 313.0, 317.0, 291.0, 285.0, 291.0, 296.0, 290.0, 289.0, 314.0, 319.0, 288.0, 294.0, 280.0, 296.0, 278.0, 286.0, 316.0, 317.0, 260.0, 265.0, 292.0, 290.0, 292.0, 287.0, 326.0, 304.0, 294.0, 285.0, 293.0, 280.0, 287.0, 286.0, 288.0, 291.0, 298.0, 289.0, 303.0, 276.0, 291.0, 291.0, 277.0, 302.0, 253.0, 268.0, 288.0, 288.0, 267.0, 263.0, 290.0, 294.0, 307.0, 323.0, 297.0, 290.0, 296.0, 286.0, 293.0, 286.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1794594867485337, "mean_processing_ms": 0.3039706521282891, "mean_inference_ms": 1.735821597361437}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4944000, "num_steps_sampled": 2636800, "sample_time_ms": 21024.303, "load_time_ms": 36.234, "grad_time_ms": 9682.83, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00302000530064106, "policy_loss": -0.005335552152246237, "vf_loss": 89.21270751953125, "vf_explained_var": 0.7561216354370117, "kl": 0.0017618268029764295, "entropy": 1.131414532661438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2636800, "episodes_total": 6592, "training_iteration": 206, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-28-17", "timestamp": 1660253297, "time_this_iter_s": 29.685957193374634, "time_total_s": 11712.500701189041, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11712.500701189041, "timesteps_since_restore": 2636800, "iterations_since_restore": 206, "perf": {"cpu_util_percent": 30.95714285714286, "ram_util_percent": 58.669047619047625}} -{"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 589.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 115.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.77}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.34, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.54, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.25, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.62, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.54, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.25, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.54, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.25, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0, 582.0, 630.0, 627.0, 582.0, 237.0, 576.0, 542.0, 573.0, 630.0, 624.0, 633.0, 582.0, 630.0, 579.0, 630.0, 582.0, 582.0, 567.0, 579.0, 618.0, 579.0, 627.0, 630.0, 582.0, 582.0, 576.0, 633.0, 627.0, 624.0, 582.0, 539.0, 567.0, 579.0, 582.0, 544.0, 573.0, 621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 297.0, 317.0, 313.0, 317.0, 310.0, 288.0, 294.0, 122.0, 115.0, 285.0, 291.0, 284.0, 258.0, 281.0, 292.0, 314.0, 316.0, 312.0, 312.0, 320.0, 313.0, 293.0, 289.0, 314.0, 316.0, 283.0, 296.0, 316.0, 314.0, 284.0, 298.0, 284.0, 298.0, 278.0, 289.0, 280.0, 299.0, 305.0, 313.0, 291.0, 288.0, 316.0, 311.0, 304.0, 326.0, 288.0, 294.0, 291.0, 291.0, 295.0, 281.0, 315.0, 318.0, 302.0, 325.0, 313.0, 311.0, 290.0, 292.0, 259.0, 280.0, 278.0, 289.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1759167909615367, "mean_processing_ms": 0.30326280237978454, "mean_inference_ms": 1.7321279401839695}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4968000, "num_steps_sampled": 2649600, "sample_time_ms": 20977.446, "load_time_ms": 35.842, "grad_time_ms": 9697.985, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015924535691738129, "policy_loss": -0.005817517638206482, "vf_loss": 79.77727508544922, "vf_explained_var": 0.7645978927612305, "kl": 0.001973592210561037, "entropy": 1.1355053186416626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2649600, "episodes_total": 6624, "training_iteration": 207, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-28-47", "timestamp": 1660253327, "time_this_iter_s": 30.242400884628296, "time_total_s": 11742.74310207367, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11742.74310207367, "timesteps_since_restore": 2649600, "iterations_since_restore": 207, "perf": {"cpu_util_percent": 33.359523809523814, "ram_util_percent": 58.971428571428575}} -{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 592.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.11}, "custom_metrics": {"sparse_reward_mean": 205.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.02, "shaped_reward_min": 145, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.04, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.62, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.32, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.62, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.32, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.62, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.32, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0, 579.0, 582.0, 544.0, 573.0, 621.0, 570.0, 587.0, 630.0, 633.0, 579.0, 630.0, 587.0, 582.0, 624.0, 624.0, 579.0, 636.0, 579.0, 576.0, 624.0, 525.0, 627.0, 579.0, 579.0, 630.0, 581.0, 465.0, 579.0, 582.0, 579.0, 573.0, 579.0, 582.0, 573.0, 579.0, 587.0, 573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 292.0, 287.0, 282.0, 300.0, 275.0, 269.0, 286.0, 287.0, 310.0, 311.0, 271.0, 299.0, 298.0, 289.0, 324.0, 306.0, 316.0, 317.0, 299.0, 280.0, 322.0, 308.0, 290.0, 297.0, 295.0, 287.0, 307.0, 317.0, 315.0, 309.0, 291.0, 288.0, 322.0, 314.0, 288.0, 291.0, 285.0, 291.0, 305.0, 319.0, 260.0, 265.0, 313.0, 314.0, 277.0, 302.0, 285.0, 294.0, 309.0, 321.0, 285.0, 296.0, 234.0, 231.0, 288.0, 291.0, 296.0, 286.0, 285.0, 294.0, 285.0, 288.0, 290.0, 289.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1724058892059666, "mean_processing_ms": 0.3025608135721768, "mean_inference_ms": 1.7284261128230096}, "off_policy_estimator": {}, "info": {"num_steps_trained": 4992000, "num_steps_sampled": 2662400, "sample_time_ms": 20797.656, "load_time_ms": 35.752, "grad_time_ms": 9645.805, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0059862625785171986, "policy_loss": -0.00204761722125113, "vf_loss": 86.01973724365234, "vf_explained_var": 0.7589619755744934, "kl": 0.0022174532059580088, "entropy": 1.136189579963684, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2662400, "episodes_total": 6656, "training_iteration": 208, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-29-16", "timestamp": 1660253356, "time_this_iter_s": 28.974893808364868, "time_total_s": 11771.717995882034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11771.717995882034, "timesteps_since_restore": 2662400, "iterations_since_restore": 208, "perf": {"cpu_util_percent": 35.22682926829268, "ram_util_percent": 58.67073170731708}} -{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 593.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.525}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.45, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0, 582.0, 573.0, 579.0, 587.0, 573.0, 530.0, 582.0, 630.0, 624.0, 633.0, 558.0, 570.0, 627.0, 624.0, 573.0, 590.0, 576.0, 573.0, 587.0, 627.0, 582.0, 576.0, 582.0, 579.0, 582.0, 564.0, 576.0, 627.0, 627.0, 579.0, 633.0, 630.0, 633.0, 579.0, 630.0, 627.0, 584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 296.0, 286.0, 293.0, 280.0, 298.0, 281.0, 295.0, 292.0, 274.0, 299.0, 263.0, 267.0, 289.0, 293.0, 321.0, 309.0, 309.0, 315.0, 309.0, 324.0, 271.0, 287.0, 293.0, 277.0, 318.0, 309.0, 313.0, 311.0, 290.0, 283.0, 291.0, 299.0, 288.0, 288.0, 300.0, 273.0, 286.0, 301.0, 308.0, 319.0, 296.0, 286.0, 285.0, 291.0, 293.0, 289.0, 288.0, 291.0, 292.0, 290.0, 281.0, 283.0, 289.0, 287.0, 314.0, 313.0, 313.0, 314.0, 289.0, 290.0, 324.0, 309.0, 313.0, 317.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.168925444559228, "mean_processing_ms": 0.30186423192914913, "mean_inference_ms": 1.7247612604215892}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5016000, "num_steps_sampled": 2675200, "sample_time_ms": 20704.168, "load_time_ms": 35.714, "grad_time_ms": 9573.901, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030685996171087027, "policy_loss": -0.005047030281275511, "vf_loss": 86.80921173095703, "vf_explained_var": 0.7612468600273132, "kl": 0.0021123213227838278, "entropy": 1.1305813789367676, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2675200, "episodes_total": 6688, "training_iteration": 209, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-29-45", "timestamp": 1660253385, "time_this_iter_s": 29.278584241867065, "time_total_s": 11800.996580123901, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11800.996580123901, "timesteps_since_restore": 2675200, "iterations_since_restore": 209, "perf": {"cpu_util_percent": 32.47857142857143, "ram_util_percent": 58.55952380952381}} -{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 592.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.49}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.18, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.66, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 15.66, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.66, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0, 633.0, 579.0, 630.0, 627.0, 584.0, 582.0, 630.0, 630.0, 582.0, 564.0, 576.0, 582.0, 630.0, 561.0, 633.0, 587.0, 579.0, 579.0, 627.0, 627.0, 627.0, 518.0, 633.0, 633.0, 582.0, 630.0, 582.0, 582.0, 587.0, 573.0, 627.0, 582.0, 573.0, 582.0, 564.0, 573.0, 630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 316.0, 317.0, 288.0, 291.0, 311.0, 319.0, 326.0, 301.0, 285.0, 299.0, 296.0, 286.0, 311.0, 319.0, 306.0, 324.0, 291.0, 291.0, 283.0, 281.0, 289.0, 287.0, 286.0, 296.0, 315.0, 315.0, 277.0, 284.0, 321.0, 312.0, 290.0, 297.0, 296.0, 283.0, 284.0, 295.0, 310.0, 317.0, 313.0, 314.0, 326.0, 301.0, 252.0, 266.0, 319.0, 314.0, 323.0, 310.0, 296.0, 286.0, 315.0, 315.0, 290.0, 292.0, 283.0, 299.0, 292.0, 295.0, 283.0, 290.0, 308.0, 319.0, 279.0, 303.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.165477981572783, "mean_processing_ms": 0.3011732242601824, "mean_inference_ms": 1.7211386547427134}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5040000, "num_steps_sampled": 2688000, "sample_time_ms": 20664.196, "load_time_ms": 35.919, "grad_time_ms": 9521.919, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002112786052748561, "policy_loss": -0.006137066055089235, "vf_loss": 88.17215728759766, "vf_explained_var": 0.7567508220672607, "kl": 0.0019861103501170874, "entropy": 1.1347342729568481, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2688000, "episodes_total": 6720, "training_iteration": 210, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-30-15", "timestamp": 1660253415, "time_this_iter_s": 29.789448976516724, "time_total_s": 11830.786029100418, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11830.786029100418, "timesteps_since_restore": 2688000, "iterations_since_restore": 210, "perf": {"cpu_util_percent": 30.638095238095236, "ram_util_percent": 58.67142857142858}} -{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 586.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 293.28}, "custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.36, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.57, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0, 573.0, 582.0, 564.0, 573.0, 630.0, 627.0, 579.0, 582.0, 579.0, 618.0, 522.0, 581.0, 590.0, 587.0, 582.0, 582.0, 579.0, 564.0, 630.0, 582.0, 587.0, 639.0, 570.0, 576.0, 567.0, 576.0, 582.0, 630.0, 584.0, 587.0, 633.0, 582.0, 630.0, 582.0, 579.0, 582.0, 573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 279.0, 294.0, 290.0, 292.0, 283.0, 281.0, 278.0, 295.0, 317.0, 313.0, 315.0, 312.0, 290.0, 289.0, 297.0, 285.0, 281.0, 298.0, 313.0, 305.0, 255.0, 267.0, 287.0, 294.0, 304.0, 286.0, 298.0, 289.0, 287.0, 295.0, 288.0, 294.0, 289.0, 290.0, 286.0, 278.0, 314.0, 316.0, 285.0, 297.0, 300.0, 287.0, 316.0, 323.0, 273.0, 297.0, 286.0, 290.0, 278.0, 289.0, 291.0, 285.0, 293.0, 289.0, 321.0, 309.0, 287.0, 297.0, 286.0, 301.0, 321.0, 312.0, 291.0, 291.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.162072675470566, "mean_processing_ms": 0.30049261071423955, "mean_inference_ms": 1.7176923877441694}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5064000, "num_steps_sampled": 2700800, "sample_time_ms": 20604.341, "load_time_ms": 36.396, "grad_time_ms": 9451.079, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014451188035309315, "policy_loss": -0.0075116343796253204, "vf_loss": 95.30281829833984, "vf_explained_var": 0.7530279755592346, "kl": 0.001810736837796867, "entropy": 1.147046446800232, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2700800, "episodes_total": 6752, "training_iteration": 211, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-30-46", "timestamp": 1660253446, "time_this_iter_s": 30.540673971176147, "time_total_s": 11861.326703071594, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11861.326703071594, "timesteps_since_restore": 2700800, "iterations_since_restore": 211, "perf": {"cpu_util_percent": 31.16511627906976, "ram_util_percent": 58.63023255813955}} -{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 585.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 292.975}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.55, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.82, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.36, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.36, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.36, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0, 630.0, 582.0, 579.0, 582.0, 573.0, 579.0, 570.0, 582.0, 576.0, 633.0, 582.0, 582.0, 582.0, 627.0, 633.0, 587.0, 579.0, 587.0, 587.0, 579.0, 582.0, 584.0, 513.0, 630.0, 582.0, 630.0, 630.0, 633.0, 590.0, 582.0, 582.0, 582.0, 579.0, 630.0, 582.0, 579.0, 576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 316.0, 314.0, 286.0, 296.0, 302.0, 277.0, 294.0, 288.0, 288.0, 285.0, 287.0, 292.0, 288.0, 282.0, 295.0, 287.0, 283.0, 293.0, 319.0, 314.0, 282.0, 300.0, 291.0, 291.0, 303.0, 279.0, 306.0, 321.0, 314.0, 319.0, 290.0, 297.0, 278.0, 301.0, 300.0, 287.0, 304.0, 283.0, 298.0, 281.0, 299.0, 283.0, 289.0, 295.0, 259.0, 254.0, 321.0, 309.0, 285.0, 297.0, 311.0, 319.0, 318.0, 312.0, 318.0, 315.0, 293.0, 297.0, 296.0, 286.0, 294.0, 288.0, 297.0, 285.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.15872347920077, "mean_processing_ms": 0.2998198878857747, "mean_inference_ms": 1.7145174243808747}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5088000, "num_steps_sampled": 2713600, "sample_time_ms": 20641.235, "load_time_ms": 36.613, "grad_time_ms": 9485.245, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.010028759017586708, "policy_loss": 0.0009867753833532333, "vf_loss": 96.11052703857422, "vf_explained_var": 0.7489395141601562, "kl": 0.0021745015401393175, "entropy": 1.1381220817565918, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2713600, "episodes_total": 6784, "training_iteration": 212, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-31-18", "timestamp": 1660253478, "time_this_iter_s": 32.31651592254639, "time_total_s": 11893.64321899414, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11893.64321899414, "timesteps_since_restore": 2713600, "iterations_since_restore": 212, "perf": {"cpu_util_percent": 33.958695652173915, "ram_util_percent": 58.643478260869585}} -{"episode_reward_max": 636.0, "episode_reward_min": 465.0, "episode_reward_mean": 583.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.905}, "custom_metrics": {"sparse_reward_mean": 201.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 181.01, "shaped_reward_min": 138, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0, 579.0, 630.0, 582.0, 579.0, 576.0, 573.0, 579.0, 465.0, 579.0, 630.0, 582.0, 582.0, 536.0, 587.0, 587.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 579.0, 576.0, 582.0, 530.0, 484.0, 627.0, 582.0, 579.0, 584.0, 579.0, 590.0, 587.0, 587.0, 579.0, 636.0, 627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 295.0, 284.0, 306.0, 324.0, 293.0, 289.0, 286.0, 293.0, 278.0, 298.0, 284.0, 289.0, 288.0, 291.0, 231.0, 234.0, 279.0, 300.0, 313.0, 317.0, 297.0, 285.0, 291.0, 291.0, 271.0, 265.0, 302.0, 285.0, 290.0, 297.0, 316.0, 314.0, 286.0, 293.0, 313.0, 317.0, 294.0, 285.0, 296.0, 286.0, 287.0, 292.0, 288.0, 291.0, 290.0, 286.0, 293.0, 289.0, 260.0, 270.0, 234.0, 250.0, 308.0, 319.0, 294.0, 288.0, 290.0, 289.0, 296.0, 288.0, 292.0, 287.0, 301.0, 289.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1554138563428238, "mean_processing_ms": 0.299155513024685, "mean_inference_ms": 1.7115208998476246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5112000, "num_steps_sampled": 2726400, "sample_time_ms": 20734.364, "load_time_ms": 36.691, "grad_time_ms": 9415.374, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028692474588751793, "policy_loss": -0.0050502982921898365, "vf_loss": 84.87030029296875, "vf_explained_var": 0.7659473419189453, "kl": 0.0017100750701501966, "entropy": 1.134959101676941, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2726400, "episodes_total": 6816, "training_iteration": 213, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-31-49", "timestamp": 1660253509, "time_this_iter_s": 30.962037086486816, "time_total_s": 11924.605256080627, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11924.605256080627, "timesteps_since_restore": 2726400, "iterations_since_restore": 213, "perf": {"cpu_util_percent": 30.947727272727267, "ram_util_percent": 58.58863636363639}} -{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 584.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.095}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.59, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.44, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.56, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.56, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.56, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0, 587.0, 587.0, 579.0, 636.0, 627.0, 627.0, 582.0, 542.0, 582.0, 579.0, 468.0, 539.0, 584.0, 587.0, 582.0, 633.0, 587.0, 579.0, 498.0, 627.0, 579.0, 630.0, 582.0, 630.0, 630.0, 582.0, 576.0, 587.0, 633.0, 582.0, 621.0, 627.0, 582.0, 587.0, 582.0, 525.0, 478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 301.0, 286.0, 300.0, 287.0, 300.0, 279.0, 320.0, 316.0, 303.0, 324.0, 318.0, 309.0, 294.0, 288.0, 267.0, 275.0, 288.0, 294.0, 293.0, 286.0, 236.0, 232.0, 277.0, 262.0, 288.0, 296.0, 302.0, 285.0, 285.0, 297.0, 314.0, 319.0, 296.0, 291.0, 293.0, 286.0, 245.0, 253.0, 318.0, 309.0, 295.0, 284.0, 318.0, 312.0, 293.0, 289.0, 309.0, 321.0, 310.0, 320.0, 280.0, 302.0, 293.0, 283.0, 286.0, 301.0, 320.0, 313.0, 286.0, 296.0, 314.0, 307.0, 319.0, 308.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1521297133023998, "mean_processing_ms": 0.2984947227408811, "mean_inference_ms": 1.7084618155808986}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5136000, "num_steps_sampled": 2739200, "sample_time_ms": 20745.898, "load_time_ms": 36.566, "grad_time_ms": 9296.474, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004741498734802008, "policy_loss": -0.003847965970635414, "vf_loss": 91.54241943359375, "vf_explained_var": 0.7623968124389648, "kl": 0.00236759171821177, "entropy": 1.1295729875564575, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2739200, "episodes_total": 6848, "training_iteration": 214, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-32-19", "timestamp": 1660253539, "time_this_iter_s": 29.774744749069214, "time_total_s": 11954.380000829697, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11954.380000829697, "timesteps_since_restore": 2739200, "iterations_since_restore": 214, "perf": {"cpu_util_percent": 27.035714285714292, "ram_util_percent": 58.526190476190486}} -{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 584.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 292.065}, "custom_metrics": {"sparse_reward_mean": 202.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.13, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0, 582.0, 587.0, 582.0, 525.0, 478.0, 582.0, 582.0, 582.0, 576.0, 576.0, 627.0, 579.0, 630.0, 536.0, 579.0, 630.0, 582.0, 582.0, 579.0, 582.0, 570.0, 576.0, 630.0, 627.0, 536.0, 579.0, 582.0, 579.0, 587.0, 584.0, 630.0, 582.0, 630.0, 582.0, 582.0, 579.0, 582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 301.0, 281.0, 282.0, 305.0, 288.0, 294.0, 256.0, 269.0, 246.0, 232.0, 301.0, 281.0, 298.0, 284.0, 291.0, 291.0, 303.0, 273.0, 282.0, 294.0, 316.0, 311.0, 293.0, 286.0, 308.0, 322.0, 270.0, 266.0, 288.0, 291.0, 324.0, 306.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 293.0, 289.0, 296.0, 274.0, 297.0, 279.0, 315.0, 315.0, 308.0, 319.0, 266.0, 270.0, 283.0, 296.0, 290.0, 292.0, 283.0, 296.0, 296.0, 291.0, 293.0, 291.0, 316.0, 314.0, 281.0, 301.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1488626119910377, "mean_processing_ms": 0.297841305997747, "mean_inference_ms": 1.7053304969218863}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5160000, "num_steps_sampled": 2752000, "sample_time_ms": 20853.401, "load_time_ms": 36.595, "grad_time_ms": 9300.168, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004025696776807308, "policy_loss": -0.0038255956023931503, "vf_loss": 84.18643951416016, "vf_explained_var": 0.7665885090827942, "kl": 0.0019039264880120754, "entropy": 1.1346958875656128, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2752000, "episodes_total": 6880, "training_iteration": 215, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-32-49", "timestamp": 1660253569, "time_this_iter_s": 30.592424869537354, "time_total_s": 11984.972425699234, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 11984.972425699234, "timesteps_since_restore": 2752000, "iterations_since_restore": 215, "perf": {"cpu_util_percent": 30.46511627906977, "ram_util_percent": 58.576744186046504}} -{"episode_reward_max": 636.0, "episode_reward_min": 345.0, "episode_reward_mean": 589.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.55}, "custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 181.1, "shaped_reward_min": 105, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.05, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.94, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.02, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.88, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.09, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.8, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.09, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.09, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0, 630.0, 582.0, 582.0, 579.0, 582.0, 630.0, 587.0, 576.0, 582.0, 587.0, 630.0, 587.0, 630.0, 476.0, 579.0, 587.0, 584.0, 587.0, 627.0, 579.0, 558.0, 479.0, 630.0, 579.0, 630.0, 579.0, 630.0, 584.0, 576.0, 627.0, 627.0, 345.0, 579.0, 621.0, 582.0, 519.0, 636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 316.0, 314.0, 290.0, 292.0, 289.0, 293.0, 286.0, 293.0, 299.0, 283.0, 309.0, 321.0, 287.0, 300.0, 290.0, 286.0, 289.0, 293.0, 296.0, 291.0, 314.0, 316.0, 303.0, 284.0, 314.0, 316.0, 243.0, 233.0, 273.0, 306.0, 296.0, 291.0, 291.0, 293.0, 298.0, 289.0, 307.0, 320.0, 284.0, 295.0, 264.0, 294.0, 238.0, 241.0, 326.0, 304.0, 288.0, 291.0, 319.0, 311.0, 289.0, 290.0, 311.0, 319.0, 292.0, 292.0, 295.0, 281.0, 321.0, 306.0, 316.0, 311.0, 173.0, 172.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1456158536618073, "mean_processing_ms": 0.29719304474995795, "mean_inference_ms": 1.7020409366400755}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5184000, "num_steps_sampled": 2764800, "sample_time_ms": 20854.663, "load_time_ms": 36.839, "grad_time_ms": 9330.064, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0055513703264296055, "policy_loss": -0.0025626528076827526, "vf_loss": 86.77967071533203, "vf_explained_var": 0.7667043805122375, "kl": 0.00211916770786047, "entropy": 1.1278961896896362, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2764800, "episodes_total": 6912, "training_iteration": 216, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-33-19", "timestamp": 1660253599, "time_this_iter_s": 29.99899387359619, "time_total_s": 12014.97141957283, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12014.97141957283, "timesteps_since_restore": 2764800, "iterations_since_restore": 216, "perf": {"cpu_util_percent": 32.32380952380952, "ram_util_percent": 58.607142857142854}} -{"episode_reward_max": 639.0, "episode_reward_min": 456.0, "episode_reward_mean": 596.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 298.185}, "custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.17, "shaped_reward_min": 136, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.31, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.48, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.31, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.31, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0, 579.0, 621.0, 582.0, 519.0, 636.0, 558.0, 579.0, 576.0, 579.0, 536.0, 633.0, 584.0, 587.0, 576.0, 536.0, 627.0, 582.0, 627.0, 456.0, 584.0, 579.0, 582.0, 587.0, 621.0, 518.0, 567.0, 633.0, 627.0, 624.0, 582.0, 590.0, 621.0, 630.0, 630.0, 582.0, 636.0, 576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 295.0, 284.0, 302.0, 319.0, 291.0, 291.0, 259.0, 260.0, 314.0, 322.0, 281.0, 277.0, 292.0, 287.0, 287.0, 289.0, 286.0, 293.0, 262.0, 274.0, 319.0, 314.0, 297.0, 287.0, 288.0, 299.0, 285.0, 291.0, 278.0, 258.0, 308.0, 319.0, 304.0, 278.0, 326.0, 301.0, 225.0, 231.0, 295.0, 289.0, 300.0, 279.0, 301.0, 281.0, 293.0, 294.0, 315.0, 306.0, 247.0, 271.0, 280.0, 287.0, 326.0, 307.0, 311.0, 316.0, 317.0, 307.0, 286.0, 296.0, 290.0, 300.0, 313.0, 308.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1424035037874523, "mean_processing_ms": 0.29655443936404674, "mean_inference_ms": 1.6989240589354977}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5208000, "num_steps_sampled": 2777600, "sample_time_ms": 20971.783, "load_time_ms": 36.769, "grad_time_ms": 9428.242, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002736276714131236, "policy_loss": -0.004988871049135923, "vf_loss": 82.89418029785156, "vf_explained_var": 0.7724503874778748, "kl": 0.00226503680460155, "entropy": 1.1285419464111328, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2777600, "episodes_total": 6944, "training_iteration": 217, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-33-52", "timestamp": 1660253632, "time_this_iter_s": 32.39657115936279, "time_total_s": 12047.367990732193, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12047.367990732193, "timesteps_since_restore": 2777600, "iterations_since_restore": 217, "perf": {"cpu_util_percent": 33.11304347826087, "ram_util_percent": 58.56521739130436}} -{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 593.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 296.97}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 183.14, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0, 630.0, 630.0, 582.0, 636.0, 576.0, 630.0, 627.0, 582.0, 627.0, 579.0, 579.0, 567.0, 587.0, 587.0, 630.0, 579.0, 582.0, 576.0, 582.0, 630.0, 570.0, 582.0, 627.0, 587.0, 582.0, 582.0, 630.0, 582.0, 627.0, 582.0, 627.0, 587.0, 633.0, 587.0, 582.0, 630.0, 627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 313.0, 317.0, 318.0, 312.0, 296.0, 286.0, 321.0, 315.0, 294.0, 282.0, 317.0, 313.0, 309.0, 318.0, 296.0, 286.0, 316.0, 311.0, 288.0, 291.0, 285.0, 294.0, 286.0, 281.0, 298.0, 289.0, 298.0, 289.0, 314.0, 316.0, 290.0, 289.0, 289.0, 293.0, 291.0, 285.0, 285.0, 297.0, 306.0, 324.0, 298.0, 272.0, 283.0, 299.0, 311.0, 316.0, 293.0, 294.0, 300.0, 282.0, 293.0, 289.0, 319.0, 311.0, 285.0, 297.0, 310.0, 317.0, 293.0, 289.0, 310.0, 317.0, 300.0, 287.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1392367943050357, "mean_processing_ms": 0.29592632211468906, "mean_inference_ms": 1.6959597907664128}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5232000, "num_steps_sampled": 2790400, "sample_time_ms": 21233.48, "load_time_ms": 36.919, "grad_time_ms": 9592.49, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006457938347011805, "policy_loss": -0.0026744985952973366, "vf_loss": 97.0146713256836, "vf_explained_var": 0.7470273375511169, "kl": 0.0016420072643086314, "entropy": 1.1380563974380493, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2790400, "episodes_total": 6976, "training_iteration": 218, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-34-25", "timestamp": 1660253665, "time_this_iter_s": 33.2370343208313, "time_total_s": 12080.605025053024, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12080.605025053024, "timesteps_since_restore": 2790400, "iterations_since_restore": 218, "perf": {"cpu_util_percent": 35.75531914893618, "ram_util_percent": 58.63191489361703}} -{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 593.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 296.79}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 182.78, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.81, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.31, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.81, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.31, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.81, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.31, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0, 633.0, 587.0, 582.0, 630.0, 627.0, 630.0, 636.0, 582.0, 584.0, 587.0, 636.0, 630.0, 573.0, 573.0, 630.0, 582.0, 627.0, 576.0, 633.0, 630.0, 633.0, 587.0, 633.0, 579.0, 579.0, 587.0, 582.0, 627.0, 630.0, 639.0, 587.0, 582.0, 579.0, 582.0, 576.0, 636.0, 582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 316.0, 317.0, 298.0, 289.0, 283.0, 299.0, 319.0, 311.0, 298.0, 329.0, 321.0, 309.0, 321.0, 315.0, 289.0, 293.0, 294.0, 290.0, 281.0, 306.0, 306.0, 330.0, 313.0, 317.0, 279.0, 294.0, 288.0, 285.0, 316.0, 314.0, 293.0, 289.0, 319.0, 308.0, 288.0, 288.0, 324.0, 309.0, 322.0, 308.0, 311.0, 322.0, 295.0, 292.0, 316.0, 317.0, 283.0, 296.0, 287.0, 292.0, 293.0, 294.0, 278.0, 304.0, 316.0, 311.0, 321.0, 309.0, 315.0, 324.0, 306.0, 281.0, 293.0, 289.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1361092001504756, "mean_processing_ms": 0.29530600936138574, "mean_inference_ms": 1.6931104739373604}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5256000, "num_steps_sampled": 2803200, "sample_time_ms": 21336.415, "load_time_ms": 37.483, "grad_time_ms": 9561.229, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005191893433220685, "policy_loss": -0.0074623264372348785, "vf_loss": 85.4925765991211, "vf_explained_var": 0.7601101994514465, "kl": 0.0019686671439558268, "entropy": 1.1354910135269165, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2803200, "episodes_total": 7008, "training_iteration": 219, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-34-55", "timestamp": 1660253695, "time_this_iter_s": 30.000843048095703, "time_total_s": 12110.60586810112, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12110.60586810112, "timesteps_since_restore": 2803200, "iterations_since_restore": 219, "perf": {"cpu_util_percent": 30.46666666666667, "ram_util_percent": 58.68571428571429}} -{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 588.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 294.01}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.62, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0, 579.0, 582.0, 576.0, 636.0, 582.0, 579.0, 579.0, 587.0, 582.0, 630.0, 636.0, 579.0, 633.0, 579.0, 584.0, 582.0, 579.0, 579.0, 579.0, 630.0, 630.0, 587.0, 587.0, 518.0, 633.0, 587.0, 582.0, 530.0, 582.0, 462.0, 582.0, 627.0, 291.0, 587.0, 579.0, 582.0, 636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 293.0, 286.0, 293.0, 289.0, 291.0, 285.0, 314.0, 322.0, 276.0, 306.0, 282.0, 297.0, 298.0, 281.0, 301.0, 286.0, 283.0, 299.0, 319.0, 311.0, 321.0, 315.0, 299.0, 280.0, 315.0, 318.0, 294.0, 285.0, 298.0, 286.0, 288.0, 294.0, 297.0, 282.0, 282.0, 297.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 296.0, 291.0, 291.0, 296.0, 262.0, 256.0, 317.0, 316.0, 294.0, 293.0, 296.0, 286.0, 262.0, 268.0, 289.0, 293.0, 228.0, 234.0, 288.0, 294.0, 313.0, 314.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0]}, "sampler_perf": {"mean_env_wait_ms": 1.133009827763421, "mean_processing_ms": 0.29469029655090995, "mean_inference_ms": 1.6902501086005228}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5280000, "num_steps_sampled": 2816000, "sample_time_ms": 21400.362, "load_time_ms": 37.346, "grad_time_ms": 9606.188, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002275400562211871, "policy_loss": -0.0062116296030581, "vf_loss": 90.5528793334961, "vf_explained_var": 0.7516798973083496, "kl": 0.0019114302704110742, "entropy": 1.1365100145339966, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2816000, "episodes_total": 7040, "training_iteration": 220, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-35-26", "timestamp": 1660253726, "time_this_iter_s": 30.873941659927368, "time_total_s": 12141.479809761047, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12141.479809761047, "timesteps_since_restore": 2816000, "iterations_since_restore": 220, "perf": {"cpu_util_percent": 31.518181818181816, "ram_util_percent": 58.6159090909091}} -{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 590.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 295.165}, "custom_metrics": {"sparse_reward_mean": 204.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.93, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.64, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.09, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0, 291.0, 587.0, 579.0, 582.0, 636.0, 627.0, 555.0, 636.0, 627.0, 582.0, 630.0, 408.0, 582.0, 587.0, 570.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 587.0, 573.0, 630.0, 587.0, 576.0, 630.0, 576.0, 630.0, 630.0, 584.0, 582.0, 582.0, 567.0, 633.0, 582.0, 630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 145.0, 146.0, 291.0, 296.0, 293.0, 286.0, 286.0, 296.0, 319.0, 317.0, 318.0, 309.0, 269.0, 286.0, 324.0, 312.0, 319.0, 308.0, 291.0, 291.0, 318.0, 312.0, 202.0, 206.0, 292.0, 290.0, 293.0, 294.0, 278.0, 292.0, 289.0, 284.0, 332.0, 301.0, 322.0, 317.0, 311.0, 322.0, 326.0, 304.0, 317.0, 313.0, 301.0, 286.0, 277.0, 296.0, 316.0, 314.0, 296.0, 291.0, 272.0, 304.0, 319.0, 311.0, 291.0, 285.0, 313.0, 317.0, 316.0, 314.0, 290.0, 294.0, 299.0, 283.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1299217967931974, "mean_processing_ms": 0.29407401911825776, "mean_inference_ms": 1.6873151851132406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5304000, "num_steps_sampled": 2828800, "sample_time_ms": 21404.207, "load_time_ms": 36.943, "grad_time_ms": 9639.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004844650160521269, "policy_loss": -0.004174739122390747, "vf_loss": 95.8445816040039, "vf_explained_var": 0.7459821701049805, "kl": 0.0019909220281988382, "entropy": 1.1301350593566895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2828800, "episodes_total": 7072, "training_iteration": 221, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-35-57", "timestamp": 1660253757, "time_this_iter_s": 30.906293869018555, "time_total_s": 12172.386103630066, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12172.386103630066, "timesteps_since_restore": 2828800, "iterations_since_restore": 221, "perf": {"cpu_util_percent": 27.49545454545455, "ram_util_percent": 58.63636363636363}} -{"episode_reward_max": 639.0, "episode_reward_min": 487.0, "episode_reward_mean": 596.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 298.01}, "custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.62, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.36, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.85, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.1, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.04, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.04, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.04, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0, 582.0, 567.0, 633.0, 582.0, 630.0, 582.0, 624.0, 582.0, 518.0, 582.0, 630.0, 576.0, 633.0, 587.0, 582.0, 587.0, 582.0, 584.0, 582.0, 627.0, 544.0, 579.0, 576.0, 487.0, 582.0, 582.0, 584.0, 636.0, 582.0, 633.0, 630.0, 539.0, 579.0, 579.0, 627.0, 630.0, 582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 285.0, 297.0, 281.0, 286.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 292.0, 290.0, 321.0, 303.0, 292.0, 290.0, 252.0, 266.0, 297.0, 285.0, 313.0, 317.0, 298.0, 278.0, 324.0, 309.0, 285.0, 302.0, 291.0, 291.0, 291.0, 296.0, 291.0, 291.0, 287.0, 297.0, 291.0, 291.0, 310.0, 317.0, 272.0, 272.0, 290.0, 289.0, 293.0, 283.0, 257.0, 230.0, 285.0, 297.0, 285.0, 297.0, 300.0, 284.0, 312.0, 324.0, 277.0, 305.0, 313.0, 320.0, 321.0, 309.0, 265.0, 274.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1268569615111614, "mean_processing_ms": 0.2934624000398477, "mean_inference_ms": 1.6844698808001166}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5328000, "num_steps_sampled": 2841600, "sample_time_ms": 21356.225, "load_time_ms": 36.973, "grad_time_ms": 9654.508, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037463188637048006, "policy_loss": -0.0037510646507143974, "vf_loss": 80.60189056396484, "vf_explained_var": 0.7646245360374451, "kl": 0.002355078933760524, "entropy": 1.125628113746643, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2841600, "episodes_total": 7104, "training_iteration": 222, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-36-29", "timestamp": 1660253789, "time_this_iter_s": 31.9894540309906, "time_total_s": 12204.375557661057, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12204.375557661057, "timesteps_since_restore": 2841600, "iterations_since_restore": 222, "perf": {"cpu_util_percent": 30.18222222222223, "ram_util_percent": 58.70444444444445}} -{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 601.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.665}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.13, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.26, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.21, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.21, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.21, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0, 579.0, 579.0, 627.0, 630.0, 582.0, 630.0, 579.0, 573.0, 579.0, 627.0, 630.0, 582.0, 587.0, 627.0, 630.0, 636.0, 573.0, 582.0, 582.0, 579.0, 582.0, 627.0, 570.0, 582.0, 573.0, 593.0, 590.0, 582.0, 579.0, 555.0, 627.0, 539.0, 636.0, 582.0, 633.0, 582.0, 582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 294.0, 285.0, 299.0, 280.0, 311.0, 316.0, 309.0, 321.0, 290.0, 292.0, 306.0, 324.0, 302.0, 277.0, 288.0, 285.0, 284.0, 295.0, 322.0, 305.0, 313.0, 317.0, 289.0, 293.0, 287.0, 300.0, 310.0, 317.0, 308.0, 322.0, 314.0, 322.0, 291.0, 282.0, 298.0, 284.0, 301.0, 281.0, 285.0, 294.0, 300.0, 282.0, 299.0, 328.0, 284.0, 286.0, 288.0, 294.0, 283.0, 290.0, 289.0, 304.0, 291.0, 299.0, 294.0, 288.0, 283.0, 296.0, 285.0, 270.0, 311.0, 316.0, 260.0, 279.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 1.123810088157581, "mean_processing_ms": 0.2928524721479363, "mean_inference_ms": 1.6814399707435803}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5352000, "num_steps_sampled": 2854400, "sample_time_ms": 21091.047, "load_time_ms": 36.918, "grad_time_ms": 9784.155, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -5.6165892601711676e-05, "policy_loss": -0.007852478884160519, "vf_loss": 83.60053253173828, "vf_explained_var": 0.7575058937072754, "kl": 0.001709200325421989, "entropy": 1.1274746656417847, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2854400, "episodes_total": 7136, "training_iteration": 223, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-36-59", "timestamp": 1660253819, "time_this_iter_s": 29.606478929519653, "time_total_s": 12233.982036590576, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12233.982036590576, "timesteps_since_restore": 2854400, "iterations_since_restore": 223, "perf": {"cpu_util_percent": 31.040476190476188, "ram_util_percent": 58.538095238095245}} -{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 595.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 297.745}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.69, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0, 636.0, 582.0, 633.0, 582.0, 582.0, 579.0, 579.0, 630.0, 630.0, 627.0, 587.0, 639.0, 587.0, 630.0, 582.0, 633.0, 587.0, 627.0, 579.0, 576.0, 633.0, 582.0, 570.0, 630.0, 627.0, 579.0, 627.0, 582.0, 633.0, 633.0, 630.0, 579.0, 582.0, 627.0, 627.0, 576.0, 582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 327.0, 309.0, 285.0, 297.0, 324.0, 309.0, 290.0, 292.0, 296.0, 286.0, 283.0, 296.0, 293.0, 286.0, 310.0, 320.0, 318.0, 312.0, 318.0, 309.0, 290.0, 297.0, 325.0, 314.0, 290.0, 297.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 295.0, 292.0, 298.0, 329.0, 280.0, 299.0, 280.0, 296.0, 318.0, 315.0, 293.0, 289.0, 283.0, 287.0, 319.0, 311.0, 305.0, 322.0, 285.0, 294.0, 309.0, 318.0, 285.0, 297.0, 320.0, 313.0, 311.0, 322.0, 313.0, 317.0, 296.0, 283.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1207907910306545, "mean_processing_ms": 0.2922476172198019, "mean_inference_ms": 1.6784104187428721}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5376000, "num_steps_sampled": 2867200, "sample_time_ms": 21165.857, "load_time_ms": 36.996, "grad_time_ms": 9704.336, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003032231703400612, "policy_loss": -0.005307988729327917, "vf_loss": 89.09744262695312, "vf_explained_var": 0.7809851765632629, "kl": 0.0017985772574320436, "entropy": 1.139058232307434, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2867200, "episodes_total": 7168, "training_iteration": 224, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-37-28", "timestamp": 1660253848, "time_this_iter_s": 29.72331213951111, "time_total_s": 12263.705348730087, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12263.705348730087, "timesteps_since_restore": 2867200, "iterations_since_restore": 224, "perf": {"cpu_util_percent": 31.121428571428574, "ram_util_percent": 58.642857142857146}} -{"episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 594.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 297.15}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.3, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0, 582.0, 627.0, 627.0, 576.0, 582.0, 633.0, 579.0, 582.0, 582.0, 633.0, 627.0, 630.0, 630.0, 636.0, 633.0, 579.0, 519.0, 552.0, 627.0, 582.0, 582.0, 627.0, 630.0, 582.0, 582.0, 627.0, 630.0, 582.0, 587.0, 630.0, 627.0, 582.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 286.0, 296.0, 305.0, 322.0, 310.0, 317.0, 289.0, 287.0, 290.0, 292.0, 327.0, 306.0, 292.0, 287.0, 280.0, 302.0, 282.0, 300.0, 318.0, 315.0, 305.0, 322.0, 314.0, 316.0, 323.0, 307.0, 319.0, 317.0, 322.0, 311.0, 297.0, 282.0, 261.0, 258.0, 274.0, 278.0, 308.0, 319.0, 290.0, 292.0, 296.0, 286.0, 308.0, 319.0, 322.0, 308.0, 288.0, 294.0, 290.0, 292.0, 324.0, 303.0, 309.0, 321.0, 290.0, 292.0, 298.0, 289.0, 314.0, 316.0, 319.0, 308.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.117796938264296, "mean_processing_ms": 0.29164906507723115, "mean_inference_ms": 1.675395869901085}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5400000, "num_steps_sampled": 2880000, "sample_time_ms": 21210.508, "load_time_ms": 36.928, "grad_time_ms": 9651.029, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003380303969606757, "policy_loss": -0.0046376134268939495, "vf_loss": 85.82404327392578, "vf_explained_var": 0.7595102190971375, "kl": 0.0017190409125760198, "entropy": 1.1289840936660767, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2880000, "episodes_total": 7200, "training_iteration": 225, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-37-59", "timestamp": 1660253879, "time_this_iter_s": 30.507438898086548, "time_total_s": 12294.212787628174, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12294.212787628174, "timesteps_since_restore": 2880000, "iterations_since_restore": 225, "perf": {"cpu_util_percent": 29.595348837209304, "ram_util_percent": 58.604651162790695}} -{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 592.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 296.345}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 182.29, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 582.0, 636.0, 630.0, 66.0, 573.0, 633.0, 573.0, 570.0, 579.0, 582.0, 630.0, 633.0, 582.0, 582.0, 579.0, 572.0, 579.0, 582.0, 587.0, 587.0, 630.0, 630.0, 630.0, 582.0, 582.0, 579.0, 419.0, 587.0, 633.0, 587.0, 582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 313.0, 314.0, 288.0, 288.0, 309.0, 321.0, 324.0, 306.0, 287.0, 295.0, 321.0, 309.0, 298.0, 284.0, 316.0, 320.0, 316.0, 314.0, 34.0, 32.0, 277.0, 296.0, 309.0, 324.0, 279.0, 294.0, 285.0, 285.0, 285.0, 294.0, 285.0, 297.0, 332.0, 298.0, 316.0, 317.0, 296.0, 286.0, 296.0, 286.0, 288.0, 291.0, 274.0, 298.0, 290.0, 289.0, 283.0, 299.0, 291.0, 296.0, 305.0, 282.0, 311.0, 319.0, 303.0, 327.0, 312.0, 318.0, 296.0, 286.0, 291.0, 291.0, 293.0, 286.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1148293138411964, "mean_processing_ms": 0.29105729699863353, "mean_inference_ms": 1.6724660361311725}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5424000, "num_steps_sampled": 2892800, "sample_time_ms": 21194.238, "load_time_ms": 37.016, "grad_time_ms": 9566.171, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004350067116320133, "policy_loss": -0.004312645178288221, "vf_loss": 92.26403045654297, "vf_explained_var": 0.7493538856506348, "kl": 0.0016388074727728963, "entropy": 1.1273828744888306, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2892800, "episodes_total": 7232, "training_iteration": 226, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-38-28", "timestamp": 1660253908, "time_this_iter_s": 28.989330291748047, "time_total_s": 12323.202117919922, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12323.202117919922, "timesteps_since_restore": 2892800, "iterations_since_restore": 226, "perf": {"cpu_util_percent": 31.509756097560977, "ram_util_percent": 58.60975609756099}} -{"episode_reward_max": 639.0, "episode_reward_min": 419.0, "episode_reward_mean": 596.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 205.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 298.35}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 183.9, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0, 419.0, 587.0, 633.0, 587.0, 582.0, 582.0, 582.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 624.0, 582.0, 579.0, 630.0, 579.0, 636.0, 582.0, 630.0, 576.0, 621.0, 633.0, 582.0, 584.0, 636.0, 579.0, 576.0, 579.0, 636.0, 630.0, 633.0, 582.0, 570.0, 630.0, 630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 214.0, 205.0, 287.0, 300.0, 323.0, 310.0, 288.0, 299.0, 294.0, 288.0, 289.0, 293.0, 283.0, 299.0, 306.0, 324.0, 293.0, 294.0, 323.0, 304.0, 284.0, 295.0, 319.0, 314.0, 283.0, 299.0, 322.0, 302.0, 294.0, 288.0, 284.0, 295.0, 319.0, 311.0, 291.0, 288.0, 316.0, 320.0, 294.0, 288.0, 326.0, 304.0, 292.0, 284.0, 314.0, 307.0, 319.0, 314.0, 286.0, 296.0, 285.0, 299.0, 324.0, 312.0, 297.0, 282.0, 291.0, 285.0, 287.0, 292.0, 321.0, 315.0, 313.0, 317.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.111882925643259, "mean_processing_ms": 0.2904711783343595, "mean_inference_ms": 1.6695128259184024}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5448000, "num_steps_sampled": 2905600, "sample_time_ms": 21046.741, "load_time_ms": 36.918, "grad_time_ms": 9303.678, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001480274717323482, "policy_loss": -0.006882220506668091, "vf_loss": 89.27208709716797, "vf_explained_var": 0.7621426582336426, "kl": 0.0023567674215883017, "entropy": 1.1294348239898682, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2905600, "episodes_total": 7264, "training_iteration": 227, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-38-56", "timestamp": 1660253936, "time_this_iter_s": 28.29434609413147, "time_total_s": 12351.496464014053, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12351.496464014053, "timesteps_since_restore": 2905600, "iterations_since_restore": 227, "perf": {"cpu_util_percent": 34.097500000000004, "ram_util_percent": 58.625000000000014}} -{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 598.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.365}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.33, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0, 633.0, 582.0, 570.0, 630.0, 630.0, 582.0, 575.0, 582.0, 627.0, 630.0, 627.0, 579.0, 579.0, 630.0, 630.0, 579.0, 633.0, 584.0, 567.0, 627.0, 627.0, 582.0, 627.0, 587.0, 587.0, 582.0, 576.0, 579.0, 633.0, 639.0, 582.0, 587.0, 579.0, 582.0, 533.0, 582.0, 579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 317.0, 316.0, 296.0, 286.0, 292.0, 278.0, 316.0, 314.0, 321.0, 309.0, 288.0, 294.0, 284.0, 291.0, 293.0, 289.0, 311.0, 316.0, 314.0, 316.0, 313.0, 314.0, 290.0, 289.0, 290.0, 289.0, 324.0, 306.0, 311.0, 319.0, 292.0, 287.0, 319.0, 314.0, 291.0, 293.0, 296.0, 271.0, 323.0, 304.0, 309.0, 318.0, 284.0, 298.0, 309.0, 318.0, 282.0, 305.0, 297.0, 290.0, 287.0, 295.0, 291.0, 285.0, 287.0, 292.0, 316.0, 317.0, 317.0, 322.0, 294.0, 288.0, 301.0, 286.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.108955063453815, "mean_processing_ms": 0.2898914011168066, "mean_inference_ms": 1.6664528501755567}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5472000, "num_steps_sampled": 2918400, "sample_time_ms": 20842.466, "load_time_ms": 36.811, "grad_time_ms": 9088.977, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0036609917879104614, "policy_loss": -0.0044582299888134, "vf_loss": 86.8133316040039, "vf_explained_var": 0.7590463161468506, "kl": 0.0019074537558481097, "entropy": 1.124218463897705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2918400, "episodes_total": 7296, "training_iteration": 228, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-39-25", "timestamp": 1660253965, "time_this_iter_s": 29.044671058654785, "time_total_s": 12380.541135072708, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12380.541135072708, "timesteps_since_restore": 2918400, "iterations_since_restore": 228, "perf": {"cpu_util_percent": 34.02195121951219, "ram_util_percent": 58.739024390243905}} -{"episode_reward_max": 636.0, "episode_reward_min": 524.0, "episode_reward_mean": 595.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 297.94}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.88, "shaped_reward_min": 164, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0, 579.0, 582.0, 533.0, 582.0, 579.0, 579.0, 582.0, 587.0, 624.0, 582.0, 618.0, 582.0, 539.0, 633.0, 582.0, 582.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 579.0, 630.0, 630.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 573.0, 582.0, 582.0, 587.0, 636.0, 576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 282.0, 297.0, 298.0, 284.0, 270.0, 263.0, 288.0, 294.0, 287.0, 292.0, 287.0, 292.0, 295.0, 287.0, 292.0, 295.0, 321.0, 303.0, 291.0, 291.0, 305.0, 313.0, 283.0, 299.0, 270.0, 269.0, 316.0, 317.0, 288.0, 294.0, 290.0, 292.0, 298.0, 284.0, 283.0, 299.0, 319.0, 317.0, 309.0, 324.0, 291.0, 291.0, 287.0, 295.0, 293.0, 286.0, 312.0, 318.0, 314.0, 316.0, 309.0, 278.0, 324.0, 306.0, 291.0, 291.0, 297.0, 279.0, 317.0, 310.0, 295.0, 284.0, 282.0, 291.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1060616527900746, "mean_processing_ms": 0.28931749831274756, "mean_inference_ms": 1.663577876904456}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5496000, "num_steps_sampled": 2931200, "sample_time_ms": 20902.121, "load_time_ms": 36.225, "grad_time_ms": 9147.239, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004833377432078123, "policy_loss": -0.003439890220761299, "vf_loss": 88.37776947021484, "vf_explained_var": 0.7585814595222473, "kl": 0.0015477427514269948, "entropy": 1.1290167570114136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2931200, "episodes_total": 7328, "training_iteration": 229, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-39-56", "timestamp": 1660253996, "time_this_iter_s": 31.170966863632202, "time_total_s": 12411.71210193634, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12411.71210193634, "timesteps_since_restore": 2931200, "iterations_since_restore": 229, "perf": {"cpu_util_percent": 34.31136363636364, "ram_util_percent": 58.67272727272726}} -{"episode_reward_max": 636.0, "episode_reward_min": 524.0, "episode_reward_mean": 598.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 299.285}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.57, "shaped_reward_min": 164, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.43, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0, 582.0, 582.0, 587.0, 636.0, 576.0, 633.0, 582.0, 630.0, 630.0, 579.0, 630.0, 587.0, 630.0, 582.0, 627.0, 582.0, 627.0, 633.0, 541.0, 579.0, 582.0, 633.0, 630.0, 624.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 630.0, 630.0, 630.0, 633.0, 576.0, 582.0, 636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 294.0, 293.0, 325.0, 311.0, 279.0, 297.0, 316.0, 317.0, 293.0, 289.0, 306.0, 324.0, 311.0, 319.0, 292.0, 287.0, 314.0, 316.0, 290.0, 297.0, 312.0, 318.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 313.0, 314.0, 317.0, 316.0, 266.0, 275.0, 289.0, 290.0, 279.0, 303.0, 322.0, 311.0, 303.0, 327.0, 308.0, 316.0, 291.0, 291.0, 288.0, 294.0, 285.0, 297.0, 280.0, 302.0, 289.0, 293.0, 291.0, 288.0, 317.0, 313.0, 317.0, 313.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.1032059640461922, "mean_processing_ms": 0.28875135486760906, "mean_inference_ms": 1.6608895336787544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5520000, "num_steps_sampled": 2944000, "sample_time_ms": 20966.765, "load_time_ms": 36.396, "grad_time_ms": 9172.251, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005542902275919914, "policy_loss": -0.0032051329035311937, "vf_loss": 93.1218490600586, "vf_explained_var": 0.7535824775695801, "kl": 0.0018033984815701842, "entropy": 1.1283119916915894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2944000, "episodes_total": 7360, "training_iteration": 230, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-40-28", "timestamp": 1660254028, "time_this_iter_s": 31.772056102752686, "time_total_s": 12443.484158039093, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12443.484158039093, "timesteps_since_restore": 2944000, "iterations_since_restore": 230, "perf": {"cpu_util_percent": 32.88666666666666, "ram_util_percent": 58.577777777777776}} -{"episode_reward_max": 636.0, "episode_reward_min": 444.0, "episode_reward_mean": 595.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 297.695}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.79, "shaped_reward_min": 124, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0, 630.0, 633.0, 576.0, 582.0, 636.0, 576.0, 627.0, 630.0, 582.0, 579.0, 582.0, 633.0, 525.0, 587.0, 573.0, 627.0, 587.0, 624.0, 524.0, 587.0, 633.0, 579.0, 536.0, 582.0, 587.0, 627.0, 579.0, 533.0, 582.0, 627.0, 587.0, 633.0, 627.0, 582.0, 573.0, 633.0, 630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 319.0, 311.0, 317.0, 316.0, 266.0, 310.0, 294.0, 288.0, 320.0, 316.0, 287.0, 289.0, 312.0, 315.0, 316.0, 314.0, 296.0, 286.0, 286.0, 293.0, 285.0, 297.0, 320.0, 313.0, 259.0, 266.0, 291.0, 296.0, 292.0, 281.0, 310.0, 317.0, 297.0, 290.0, 317.0, 307.0, 253.0, 271.0, 280.0, 307.0, 319.0, 314.0, 284.0, 295.0, 267.0, 269.0, 290.0, 292.0, 298.0, 289.0, 310.0, 317.0, 288.0, 291.0, 275.0, 258.0, 294.0, 288.0, 313.0, 314.0, 283.0, 304.0, 322.0, 311.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 1.100385875299439, "mean_processing_ms": 0.2881910582639845, "mean_inference_ms": 1.6584370926312206}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5544000, "num_steps_sampled": 2956800, "sample_time_ms": 21086.396, "load_time_ms": 36.806, "grad_time_ms": 9224.029, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032783111091703176, "policy_loss": -0.005397517699748278, "vf_loss": 92.44506072998047, "vf_explained_var": 0.7564309239387512, "kl": 0.001717855571769178, "entropy": 1.137366771697998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2956800, "episodes_total": 7392, "training_iteration": 231, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-41-01", "timestamp": 1660254061, "time_this_iter_s": 32.62551975250244, "time_total_s": 12476.109677791595, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12476.109677791595, "timesteps_since_restore": 2956800, "iterations_since_restore": 231, "perf": {"cpu_util_percent": 34.25652173913044, "ram_util_percent": 58.589130434782625}} -{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 597.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 298.635}, "custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.07, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.1, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 582.0, 573.0, 633.0, 630.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 630.0, 579.0, 627.0, 579.0, 579.0, 587.0, 576.0, 579.0, 576.0, 587.0, 582.0, 582.0, 576.0, 582.0, 539.0, 633.0, 587.0, 579.0, 630.0, 633.0, 633.0, 633.0, 636.0, 579.0, 527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 316.0, 311.0, 300.0, 282.0, 278.0, 295.0, 319.0, 314.0, 319.0, 311.0, 316.0, 317.0, 296.0, 286.0, 303.0, 284.0, 316.0, 314.0, 313.0, 317.0, 317.0, 310.0, 293.0, 289.0, 314.0, 316.0, 285.0, 294.0, 316.0, 311.0, 286.0, 293.0, 283.0, 296.0, 289.0, 298.0, 285.0, 291.0, 287.0, 292.0, 297.0, 279.0, 294.0, 293.0, 288.0, 294.0, 284.0, 298.0, 287.0, 289.0, 290.0, 292.0, 273.0, 266.0, 310.0, 323.0, 295.0, 292.0, 294.0, 285.0, 321.0, 309.0, 321.0, 312.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0975911945131436, "mean_processing_ms": 0.28763548597333904, "mean_inference_ms": 1.656000718644699}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5568000, "num_steps_sampled": 2969600, "sample_time_ms": 21056.358, "load_time_ms": 36.577, "grad_time_ms": 9266.344, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002132798545062542, "policy_loss": -0.0064245969988405704, "vf_loss": 91.22052001953125, "vf_explained_var": 0.7570000290870667, "kl": 0.002030483214184642, "entropy": 1.129306674003601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2969600, "episodes_total": 7424, "training_iteration": 232, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-41-33", "timestamp": 1660254093, "time_this_iter_s": 32.109358072280884, "time_total_s": 12508.219035863876, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12508.219035863876, "timesteps_since_restore": 2969600, "iterations_since_restore": 232, "perf": {"cpu_util_percent": 33.97777777777779, "ram_util_percent": 58.6088888888889}} -{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 596.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 298.275}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 183.75, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.46, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.26, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.11, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.11, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.11, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0, 633.0, 633.0, 636.0, 579.0, 527.0, 582.0, 576.0, 587.0, 582.0, 633.0, 444.0, 630.0, 590.0, 633.0, 522.0, 636.0, 579.0, 587.0, 633.0, 627.0, 587.0, 630.0, 636.0, 587.0, 582.0, 582.0, 582.0, 627.0, 636.0, 582.0, 627.0, 582.0, 579.0, 579.0, 633.0, 536.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 313.0, 320.0, 331.0, 302.0, 321.0, 315.0, 296.0, 283.0, 262.0, 265.0, 287.0, 295.0, 277.0, 299.0, 285.0, 302.0, 293.0, 289.0, 313.0, 320.0, 229.0, 215.0, 321.0, 309.0, 292.0, 298.0, 311.0, 322.0, 259.0, 263.0, 309.0, 327.0, 288.0, 291.0, 292.0, 295.0, 315.0, 318.0, 321.0, 306.0, 295.0, 292.0, 318.0, 312.0, 313.0, 323.0, 293.0, 294.0, 294.0, 288.0, 285.0, 297.0, 291.0, 291.0, 322.0, 305.0, 329.0, 307.0, 291.0, 291.0, 313.0, 314.0, 281.0, 301.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0948199723537093, "mean_processing_ms": 0.2870853418047666, "mean_inference_ms": 1.653643049405544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5592000, "num_steps_sampled": 2982400, "sample_time_ms": 21374.201, "load_time_ms": 36.582, "grad_time_ms": 9304.523, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033536478877067566, "policy_loss": -0.004937517922371626, "vf_loss": 88.6025161743164, "vf_explained_var": 0.7515634894371033, "kl": 0.0023627106565982103, "entropy": 1.138161540031433, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2982400, "episodes_total": 7456, "training_iteration": 233, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-42-06", "timestamp": 1660254126, "time_this_iter_s": 33.16590905189514, "time_total_s": 12541.384944915771, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12541.384944915771, "timesteps_since_restore": 2982400, "iterations_since_restore": 233, "perf": {"cpu_util_percent": 33.06170212765958, "ram_util_percent": 58.5808510638298}} -{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 598.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 299.29}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.18, "shaped_reward_min": 170, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0, 579.0, 579.0, 633.0, 536.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 582.0, 587.0, 630.0, 573.0, 627.0, 579.0, 576.0, 582.0, 630.0, 587.0, 590.0, 582.0, 630.0, 627.0, 582.0, 582.0, 582.0, 573.0, 576.0, 570.0, 582.0, 582.0, 579.0, 633.0, 627.0, 633.0, 579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 296.0, 283.0, 291.0, 288.0, 316.0, 317.0, 273.0, 263.0, 321.0, 306.0, 316.0, 314.0, 277.0, 302.0, 318.0, 321.0, 286.0, 296.0, 303.0, 330.0, 286.0, 296.0, 288.0, 299.0, 324.0, 306.0, 274.0, 299.0, 311.0, 316.0, 291.0, 288.0, 292.0, 284.0, 294.0, 288.0, 318.0, 312.0, 300.0, 287.0, 296.0, 294.0, 290.0, 292.0, 304.0, 326.0, 315.0, 312.0, 294.0, 288.0, 299.0, 283.0, 291.0, 291.0, 282.0, 291.0, 290.0, 286.0, 282.0, 288.0, 294.0, 288.0, 293.0, 289.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0920719973611583, "mean_processing_ms": 0.2865396383505603, "mean_inference_ms": 1.6512949554166665}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5616000, "num_steps_sampled": 2995200, "sample_time_ms": 21497.979, "load_time_ms": 36.457, "grad_time_ms": 9378.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021230385173112154, "policy_loss": -0.0060439333319664, "vf_loss": 87.32781982421875, "vf_explained_var": 0.7546737194061279, "kl": 0.0017831752775236964, "entropy": 1.1316334009170532, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 2995200, "episodes_total": 7488, "training_iteration": 234, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-42-38", "timestamp": 1660254158, "time_this_iter_s": 31.695109128952026, "time_total_s": 12573.080054044724, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12573.080054044724, "timesteps_since_restore": 2995200, "iterations_since_restore": 234, "perf": {"cpu_util_percent": 33.72888888888888, "ram_util_percent": 58.67111111111112}} -{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 599.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 299.585}, "custom_metrics": {"sparse_reward_mean": 207.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.37, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0, 579.0, 633.0, 627.0, 633.0, 579.0, 582.0, 576.0, 579.0, 573.0, 627.0, 579.0, 582.0, 630.0, 530.0, 630.0, 636.0, 627.0, 570.0, 582.0, 627.0, 582.0, 584.0, 582.0, 587.0, 587.0, 582.0, 576.0, 636.0, 579.0, 582.0, 579.0, 584.0, 630.0, 627.0, 630.0, 630.0, 630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 297.0, 282.0, 314.0, 319.0, 308.0, 319.0, 313.0, 320.0, 295.0, 284.0, 296.0, 286.0, 288.0, 288.0, 286.0, 293.0, 278.0, 295.0, 318.0, 309.0, 285.0, 294.0, 295.0, 287.0, 308.0, 322.0, 268.0, 262.0, 314.0, 316.0, 314.0, 322.0, 305.0, 322.0, 287.0, 283.0, 296.0, 286.0, 322.0, 305.0, 286.0, 296.0, 295.0, 289.0, 298.0, 284.0, 301.0, 286.0, 284.0, 303.0, 288.0, 294.0, 300.0, 276.0, 332.0, 304.0, 282.0, 297.0, 304.0, 278.0, 277.0, 302.0, 301.0, 283.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0893523875509898, "mean_processing_ms": 0.2860034188911333, "mean_inference_ms": 1.6490498343131736}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5640000, "num_steps_sampled": 3008000, "sample_time_ms": 21585.027, "load_time_ms": 36.709, "grad_time_ms": 9663.091, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033673776779323816, "policy_loss": -0.0045895627699792385, "vf_loss": 85.23816680908203, "vf_explained_var": 0.7584102749824524, "kl": 0.0018025357276201248, "entropy": 1.1337394714355469, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3008000, "episodes_total": 7520, "training_iteration": 235, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-43-12", "timestamp": 1660254192, "time_this_iter_s": 34.23338508605957, "time_total_s": 12607.313439130783, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12607.313439130783, "timesteps_since_restore": 3008000, "iterations_since_restore": 235, "perf": {"cpu_util_percent": 33.239583333333336, "ram_util_percent": 58.65}} -{"episode_reward_max": 639.0, "episode_reward_min": 541.0, "episode_reward_mean": 603.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 301.58}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.56, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.55, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.73, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.86, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.71, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0, 630.0, 627.0, 630.0, 630.0, 630.0, 582.0, 582.0, 630.0, 582.0, 627.0, 576.0, 576.0, 636.0, 630.0, 579.0, 630.0, 582.0, 582.0, 587.0, 636.0, 579.0, 581.0, 587.0, 582.0, 630.0, 639.0, 627.0, 639.0, 630.0, 630.0, 627.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 316.0, 314.0, 310.0, 317.0, 324.0, 306.0, 321.0, 309.0, 328.0, 302.0, 284.0, 298.0, 283.0, 299.0, 308.0, 322.0, 292.0, 290.0, 300.0, 327.0, 291.0, 285.0, 295.0, 281.0, 326.0, 310.0, 313.0, 317.0, 302.0, 277.0, 318.0, 312.0, 293.0, 289.0, 298.0, 284.0, 296.0, 291.0, 332.0, 304.0, 293.0, 286.0, 285.0, 296.0, 303.0, 284.0, 299.0, 283.0, 324.0, 306.0, 327.0, 312.0, 316.0, 311.0, 319.0, 320.0, 317.0, 313.0, 319.0, 311.0, 309.0, 318.0, 293.0, 277.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0866480049346467, "mean_processing_ms": 0.2854697984995614, "mean_inference_ms": 1.6467374423655963}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5664000, "num_steps_sampled": 3020800, "sample_time_ms": 21716.507, "load_time_ms": 36.498, "grad_time_ms": 9682.814, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004189230967313051, "policy_loss": -0.003748750314116478, "vf_loss": 85.03255462646484, "vf_explained_var": 0.76678067445755, "kl": 0.001733882469125092, "entropy": 1.130557656288147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3020800, "episodes_total": 7552, "training_iteration": 236, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-43-43", "timestamp": 1660254223, "time_this_iter_s": 30.502008199691772, "time_total_s": 12637.815447330475, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12637.815447330475, "timesteps_since_restore": 3020800, "iterations_since_restore": 236, "perf": {"cpu_util_percent": 34.461363636363636, "ram_util_percent": 59.190909090909095}} -{"episode_reward_max": 639.0, "episode_reward_min": 515.0, "episode_reward_mean": 596.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 298.165}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.53, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.6, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.98, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.76, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.51, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.76, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.51, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.76, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.51, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0, 587.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 627.0, 630.0, 582.0, 582.0, 579.0, 582.0, 582.0, 590.0, 627.0, 579.0, 627.0, 576.0, 636.0, 579.0, 579.0, 582.0, 627.0, 561.0, 630.0, 587.0, 630.0, 582.0, 579.0, 639.0, 630.0, 579.0, 636.0, 584.0, 541.0, 633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 291.0, 296.0, 290.0, 289.0, 288.0, 294.0, 290.0, 286.0, 284.0, 298.0, 295.0, 284.0, 285.0, 297.0, 305.0, 322.0, 323.0, 307.0, 283.0, 299.0, 294.0, 288.0, 287.0, 292.0, 305.0, 277.0, 293.0, 289.0, 299.0, 291.0, 308.0, 319.0, 301.0, 278.0, 322.0, 305.0, 283.0, 293.0, 319.0, 317.0, 288.0, 291.0, 294.0, 285.0, 286.0, 296.0, 306.0, 321.0, 291.0, 270.0, 306.0, 324.0, 286.0, 301.0, 303.0, 327.0, 291.0, 291.0, 290.0, 289.0, 312.0, 327.0, 321.0, 309.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0839571838028972, "mean_processing_ms": 0.28493791430015475, "mean_inference_ms": 1.6442666845730367}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5688000, "num_steps_sampled": 3033600, "sample_time_ms": 21735.725, "load_time_ms": 37.102, "grad_time_ms": 9981.103, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00033502434962429106, "policy_loss": -0.007877787575125694, "vf_loss": 87.85860443115234, "vf_explained_var": 0.7610828280448914, "kl": 0.0018075080588459969, "entropy": 1.1460970640182495, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3033600, "episodes_total": 7584, "training_iteration": 237, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-44-14", "timestamp": 1660254254, "time_this_iter_s": 31.47483992576599, "time_total_s": 12669.29028725624, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12669.29028725624, "timesteps_since_restore": 3033600, "iterations_since_restore": 237, "perf": {"cpu_util_percent": 33.54772727272728, "ram_util_percent": 58.545454545454554}} -{"episode_reward_max": 639.0, "episode_reward_min": 515.0, "episode_reward_mean": 598.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.205}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.41, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.61, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0, 579.0, 636.0, 584.0, 541.0, 633.0, 630.0, 582.0, 627.0, 584.0, 633.0, 579.0, 630.0, 582.0, 627.0, 582.0, 584.0, 587.0, 630.0, 582.0, 627.0, 630.0, 627.0, 587.0, 582.0, 639.0, 582.0, 582.0, 633.0, 582.0, 587.0, 627.0, 633.0, 582.0, 639.0, 579.0, 630.0, 582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 283.0, 296.0, 319.0, 317.0, 283.0, 301.0, 280.0, 261.0, 321.0, 312.0, 324.0, 306.0, 285.0, 297.0, 316.0, 311.0, 283.0, 301.0, 311.0, 322.0, 288.0, 291.0, 321.0, 309.0, 293.0, 289.0, 315.0, 312.0, 301.0, 281.0, 292.0, 292.0, 295.0, 292.0, 316.0, 314.0, 291.0, 291.0, 309.0, 318.0, 321.0, 309.0, 313.0, 314.0, 301.0, 286.0, 291.0, 291.0, 322.0, 317.0, 294.0, 288.0, 290.0, 292.0, 311.0, 322.0, 288.0, 294.0, 288.0, 299.0, 316.0, 311.0, 316.0, 317.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0812829517018399, "mean_processing_ms": 0.2844054156337277, "mean_inference_ms": 1.641726673758305}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5712000, "num_steps_sampled": 3046400, "sample_time_ms": 21856.898, "load_time_ms": 37.298, "grad_time_ms": 10099.958, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027887988835573196, "policy_loss": -0.005770063493400812, "vf_loss": 91.25625610351562, "vf_explained_var": 0.7579948306083679, "kl": 0.001784983091056347, "entropy": 1.1335158348083496, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3046400, "episodes_total": 7616, "training_iteration": 238, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-44-46", "timestamp": 1660254286, "time_this_iter_s": 31.44696879386902, "time_total_s": 12700.73725605011, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12700.73725605011, "timesteps_since_restore": 3046400, "iterations_since_restore": 238, "perf": {"cpu_util_percent": 33.757777777777775, "ram_util_percent": 58.49555555555556}} -{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 593.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 296.655}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 182.91, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0, 582.0, 639.0, 579.0, 630.0, 582.0, 579.0, 539.0, 630.0, 587.0, 630.0, 579.0, 579.0, 582.0, 627.0, 521.0, 636.0, 582.0, 582.0, 587.0, 576.0, 627.0, 527.0, 627.0, 567.0, 576.0, 570.0, 627.0, 538.0, 630.0, 636.0, 561.0, 515.0, 582.0, 630.0, 633.0, 558.0, 633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 281.0, 301.0, 324.0, 315.0, 294.0, 285.0, 317.0, 313.0, 287.0, 295.0, 297.0, 282.0, 265.0, 274.0, 316.0, 314.0, 294.0, 293.0, 319.0, 311.0, 293.0, 286.0, 293.0, 286.0, 294.0, 288.0, 310.0, 317.0, 260.0, 261.0, 324.0, 312.0, 303.0, 279.0, 293.0, 289.0, 290.0, 297.0, 282.0, 294.0, 318.0, 309.0, 268.0, 259.0, 311.0, 316.0, 289.0, 278.0, 282.0, 294.0, 276.0, 294.0, 306.0, 321.0, 273.0, 265.0, 306.0, 324.0, 317.0, 319.0, 293.0, 268.0, 258.0, 257.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0]}, "sampler_perf": {"mean_env_wait_ms": 1.078628112981745, "mean_processing_ms": 0.2838764625844994, "mean_inference_ms": 1.6391328483371586}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5736000, "num_steps_sampled": 3059200, "sample_time_ms": 21753.38, "load_time_ms": 37.651, "grad_time_ms": 10111.988, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005386353936046362, "policy_loss": -0.003314490430057049, "vf_loss": 92.68680572509766, "vf_explained_var": 0.7602830529212952, "kl": 0.0021554683335125446, "entropy": 1.135677456855774, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3059200, "episodes_total": 7648, "training_iteration": 239, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-45-16", "timestamp": 1660254316, "time_this_iter_s": 30.261106967926025, "time_total_s": 12730.998363018036, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12730.998363018036, "timesteps_since_restore": 3059200, "iterations_since_restore": 239, "perf": {"cpu_util_percent": 33.04761904761905, "ram_util_percent": 58.58571428571428}} -{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 598.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.29}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 184.58, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.27, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.47, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.22, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.22, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.22, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 558.0, 633.0, 579.0, 627.0, 582.0, 582.0, 544.0, 630.0, 587.0, 630.0, 584.0, 636.0, 624.0, 582.0, 627.0, 579.0, 630.0, 633.0, 590.0, 633.0, 630.0, 636.0, 576.0, 633.0, 582.0, 579.0, 582.0, 544.0, 630.0, 636.0, 579.0, 587.0, 573.0, 579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 291.0, 291.0, 316.0, 314.0, 308.0, 325.0, 264.0, 294.0, 321.0, 312.0, 298.0, 281.0, 313.0, 314.0, 293.0, 289.0, 285.0, 297.0, 270.0, 274.0, 321.0, 309.0, 286.0, 301.0, 310.0, 320.0, 290.0, 294.0, 322.0, 314.0, 327.0, 297.0, 288.0, 294.0, 310.0, 317.0, 288.0, 291.0, 311.0, 319.0, 313.0, 320.0, 288.0, 302.0, 313.0, 320.0, 324.0, 306.0, 312.0, 324.0, 289.0, 287.0, 311.0, 322.0, 288.0, 294.0, 283.0, 296.0, 301.0, 281.0, 273.0, 271.0, 311.0, 319.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0759937679443485, "mean_processing_ms": 0.28335070451542615, "mean_inference_ms": 1.6365163711120108}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5760000, "num_steps_sampled": 3072000, "sample_time_ms": 21577.613, "load_time_ms": 37.508, "grad_time_ms": 10049.353, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017478683730587363, "policy_loss": -0.006376888602972031, "vf_loss": 86.9516372680664, "vf_explained_var": 0.7652549743652344, "kl": 0.0021124929189682007, "entropy": 1.1408079862594604, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3072000, "episodes_total": 7680, "training_iteration": 240, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-45-45", "timestamp": 1660254345, "time_this_iter_s": 29.390948057174683, "time_total_s": 12760.38931107521, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12760.38931107521, "timesteps_since_restore": 3072000, "iterations_since_restore": 240, "perf": {"cpu_util_percent": 32.80238095238095, "ram_util_percent": 58.55238095238095}} -{"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 595.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 297.615}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 183.63, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0, 636.0, 579.0, 587.0, 573.0, 579.0, 524.0, 582.0, 636.0, 582.0, 630.0, 582.0, 633.0, 630.0, 630.0, 582.0, 582.0, 539.0, 579.0, 639.0, 579.0, 576.0, 582.0, 579.0, 636.0, 579.0, 633.0, 630.0, 587.0, 242.0, 573.0, 627.0, 582.0, 636.0, 587.0, 627.0, 636.0, 582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 319.0, 317.0, 291.0, 288.0, 302.0, 285.0, 299.0, 274.0, 288.0, 291.0, 259.0, 265.0, 288.0, 294.0, 314.0, 322.0, 290.0, 292.0, 319.0, 311.0, 295.0, 287.0, 317.0, 316.0, 326.0, 304.0, 309.0, 321.0, 293.0, 289.0, 298.0, 284.0, 263.0, 276.0, 286.0, 293.0, 320.0, 319.0, 295.0, 284.0, 290.0, 286.0, 284.0, 298.0, 278.0, 301.0, 319.0, 317.0, 288.0, 291.0, 313.0, 320.0, 322.0, 308.0, 299.0, 288.0, 117.0, 125.0, 290.0, 283.0, 319.0, 308.0, 286.0, 296.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0733850104445417, "mean_processing_ms": 0.2828300453202057, "mean_inference_ms": 1.6339069456399316}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5784000, "num_steps_sampled": 3084800, "sample_time_ms": 21470.585, "load_time_ms": 37.079, "grad_time_ms": 10121.915, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020955076906830072, "policy_loss": -0.006229180842638016, "vf_loss": 88.9510269165039, "vf_explained_var": 0.7567486763000488, "kl": 0.0017531089251860976, "entropy": 1.140811562538147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3084800, "episodes_total": 7712, "training_iteration": 241, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-46-18", "timestamp": 1660254378, "time_this_iter_s": 32.28085994720459, "time_total_s": 12792.670171022415, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12792.670171022415, "timesteps_since_restore": 3084800, "iterations_since_restore": 241, "perf": {"cpu_util_percent": 31.733333333333334, "ram_util_percent": 58.655555555555544}} -{"episode_reward_max": 639.0, "episode_reward_min": 458.0, "episode_reward_mean": 603.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.565}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.53, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0, 636.0, 587.0, 627.0, 636.0, 582.0, 636.0, 630.0, 587.0, 567.0, 587.0, 582.0, 624.0, 539.0, 633.0, 633.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 576.0, 636.0, 587.0, 630.0, 576.0, 584.0, 579.0, 579.0, 570.0, 630.0, 587.0, 582.0, 627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 324.0, 312.0, 301.0, 286.0, 311.0, 316.0, 314.0, 322.0, 284.0, 298.0, 327.0, 309.0, 319.0, 311.0, 301.0, 286.0, 284.0, 283.0, 286.0, 301.0, 289.0, 293.0, 311.0, 313.0, 273.0, 266.0, 317.0, 316.0, 306.0, 327.0, 313.0, 317.0, 324.0, 312.0, 309.0, 324.0, 319.0, 314.0, 313.0, 317.0, 327.0, 309.0, 309.0, 321.0, 321.0, 309.0, 292.0, 290.0, 293.0, 283.0, 322.0, 314.0, 283.0, 304.0, 322.0, 308.0, 283.0, 293.0, 291.0, 293.0, 285.0, 294.0, 296.0, 283.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0708107094165102, "mean_processing_ms": 0.2823168700385721, "mean_inference_ms": 1.631412939127947}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5808000, "num_steps_sampled": 3097600, "sample_time_ms": 21506.392, "load_time_ms": 37.292, "grad_time_ms": 9971.815, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004354400560259819, "policy_loss": -0.0035108765587210655, "vf_loss": 84.29744720458984, "vf_explained_var": 0.7617435455322266, "kl": 0.0018548279767856002, "entropy": 1.1289268732070923, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3097600, "episodes_total": 7744, "training_iteration": 242, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-46-49", "timestamp": 1660254409, "time_this_iter_s": 30.967852115631104, "time_total_s": 12823.638023138046, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12823.638023138046, "timesteps_since_restore": 3097600, "iterations_since_restore": 242, "perf": {"cpu_util_percent": 31.486363636363638, "ram_util_percent": 58.63863636363636}} -{"episode_reward_max": 639.0, "episode_reward_min": 458.0, "episode_reward_mean": 602.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.375}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.15, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.82, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.78, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0, 570.0, 630.0, 587.0, 582.0, 627.0, 630.0, 590.0, 627.0, 582.0, 639.0, 630.0, 576.0, 630.0, 458.0, 630.0, 550.0, 630.0, 587.0, 630.0, 582.0, 627.0, 570.0, 587.0, 630.0, 579.0, 558.0, 584.0, 538.0, 587.0, 587.0, 564.0, 630.0, 582.0, 633.0, 579.0, 539.0, 627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 282.0, 288.0, 315.0, 315.0, 298.0, 289.0, 301.0, 281.0, 318.0, 309.0, 305.0, 325.0, 294.0, 296.0, 316.0, 311.0, 292.0, 290.0, 317.0, 322.0, 311.0, 319.0, 276.0, 300.0, 314.0, 316.0, 230.0, 228.0, 326.0, 304.0, 276.0, 274.0, 328.0, 302.0, 294.0, 293.0, 306.0, 324.0, 286.0, 296.0, 310.0, 317.0, 294.0, 276.0, 286.0, 301.0, 321.0, 309.0, 293.0, 286.0, 286.0, 272.0, 293.0, 291.0, 267.0, 271.0, 302.0, 285.0, 296.0, 291.0, 278.0, 286.0, 311.0, 319.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.068272032402952, "mean_processing_ms": 0.2818129859025947, "mean_inference_ms": 1.6291161273108918}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5832000, "num_steps_sampled": 3110400, "sample_time_ms": 21475.079, "load_time_ms": 37.422, "grad_time_ms": 9892.717, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034763356670737267, "policy_loss": -0.004455787595361471, "vf_loss": 84.99886322021484, "vf_explained_var": 0.7575659155845642, "kl": 0.0017217934364452958, "entropy": 1.135510802268982, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3110400, "episodes_total": 7776, "training_iteration": 243, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-47-21", "timestamp": 1660254441, "time_this_iter_s": 32.067052125930786, "time_total_s": 12855.705075263977, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12855.705075263977, "timesteps_since_restore": 3110400, "iterations_since_restore": 243, "perf": {"cpu_util_percent": 31.317777777777778, "ram_util_percent": 58.61555555555556}} -{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 602.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.15}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.7, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.35, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.35, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.35, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0, 582.0, 633.0, 579.0, 539.0, 627.0, 590.0, 639.0, 639.0, 627.0, 570.0, 582.0, 633.0, 590.0, 627.0, 633.0, 576.0, 576.0, 627.0, 582.0, 624.0, 630.0, 633.0, 630.0, 576.0, 633.0, 633.0, 590.0, 587.0, 639.0, 579.0, 582.0, 570.0, 630.0, 582.0, 636.0, 627.0, 630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 295.0, 287.0, 316.0, 317.0, 299.0, 280.0, 268.0, 271.0, 321.0, 306.0, 294.0, 296.0, 322.0, 317.0, 317.0, 322.0, 316.0, 311.0, 275.0, 295.0, 300.0, 282.0, 326.0, 307.0, 294.0, 296.0, 313.0, 314.0, 311.0, 322.0, 291.0, 285.0, 272.0, 304.0, 313.0, 314.0, 294.0, 288.0, 315.0, 309.0, 313.0, 317.0, 314.0, 319.0, 316.0, 314.0, 280.0, 296.0, 308.0, 325.0, 321.0, 312.0, 293.0, 297.0, 296.0, 291.0, 317.0, 322.0, 291.0, 288.0, 292.0, 290.0, 294.0, 276.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.065748558737823, "mean_processing_ms": 0.28131319823148404, "mean_inference_ms": 1.6267302844305909}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5856000, "num_steps_sampled": 3123200, "sample_time_ms": 21258.847, "load_time_ms": 37.597, "grad_time_ms": 9965.955, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00222679297439754, "policy_loss": -0.005516994744539261, "vf_loss": 83.11483764648438, "vf_explained_var": 0.7694733142852783, "kl": 0.002387256594374776, "entropy": 1.135390281677246, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3123200, "episodes_total": 7808, "training_iteration": 244, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-47-51", "timestamp": 1660254471, "time_this_iter_s": 30.266911029815674, "time_total_s": 12885.971986293793, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12885.971986293793, "timesteps_since_restore": 3123200, "iterations_since_restore": 244, "perf": {"cpu_util_percent": 32.02093023255814, "ram_util_percent": 58.54186046511629}} -{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 599.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 299.72}, "custom_metrics": {"sparse_reward_mean": 207.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.24, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.28, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.28, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.28, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0, 630.0, 582.0, 636.0, 627.0, 630.0, 633.0, 630.0, 627.0, 627.0, 627.0, 582.0, 633.0, 630.0, 636.0, 584.0, 627.0, 582.0, 633.0, 573.0, 587.0, 627.0, 587.0, 630.0, 582.0, 576.0, 627.0, 579.0, 639.0, 570.0, 579.0, 582.0, 633.0, 579.0, 633.0, 627.0, 544.0, 627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 311.0, 319.0, 288.0, 294.0, 314.0, 322.0, 316.0, 311.0, 313.0, 317.0, 316.0, 317.0, 301.0, 329.0, 313.0, 314.0, 311.0, 316.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 311.0, 319.0, 324.0, 312.0, 286.0, 298.0, 324.0, 303.0, 278.0, 304.0, 307.0, 326.0, 288.0, 285.0, 298.0, 289.0, 308.0, 319.0, 293.0, 294.0, 326.0, 304.0, 295.0, 287.0, 280.0, 296.0, 315.0, 312.0, 289.0, 290.0, 322.0, 317.0, 288.0, 282.0, 283.0, 296.0, 299.0, 283.0, 309.0, 324.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0632381583217039, "mean_processing_ms": 0.2808155253638906, "mean_inference_ms": 1.6242198083388235}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5880000, "num_steps_sampled": 3136000, "sample_time_ms": 21038.899, "load_time_ms": 37.35, "grad_time_ms": 9776.148, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004664632026106119, "policy_loss": -0.003766902955248952, "vf_loss": 90.03823852539062, "vf_explained_var": 0.7575922012329102, "kl": 0.002137060509994626, "entropy": 1.1445802450180054, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3136000, "episodes_total": 7840, "training_iteration": 245, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-48-21", "timestamp": 1660254501, "time_this_iter_s": 30.129722118377686, "time_total_s": 12916.10170841217, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12916.10170841217, "timesteps_since_restore": 3136000, "iterations_since_restore": 245, "perf": {"cpu_util_percent": 34.127906976744185, "ram_util_percent": 58.56744186046512}} -{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 600.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.025}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.45, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.75, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.61, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.61, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.61, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0, 579.0, 633.0, 627.0, 544.0, 627.0, 539.0, 630.0, 549.0, 627.0, 564.0, 539.0, 636.0, 627.0, 587.0, 582.0, 636.0, 582.0, 630.0, 627.0, 576.0, 581.0, 627.0, 630.0, 582.0, 576.0, 579.0, 582.0, 627.0, 587.0, 587.0, 627.0, 582.0, 468.0, 636.0, 630.0, 504.0, 627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 282.0, 297.0, 312.0, 321.0, 305.0, 322.0, 278.0, 266.0, 316.0, 311.0, 269.0, 270.0, 311.0, 319.0, 275.0, 274.0, 308.0, 319.0, 292.0, 272.0, 271.0, 268.0, 324.0, 312.0, 313.0, 314.0, 302.0, 285.0, 299.0, 283.0, 322.0, 314.0, 293.0, 289.0, 310.0, 320.0, 319.0, 308.0, 281.0, 295.0, 282.0, 299.0, 307.0, 320.0, 321.0, 309.0, 283.0, 299.0, 277.0, 299.0, 293.0, 286.0, 275.0, 307.0, 311.0, 316.0, 285.0, 302.0, 299.0, 288.0, 316.0, 311.0, 299.0, 283.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0607418368814732, "mean_processing_ms": 0.280319937755019, "mean_inference_ms": 1.6216711984881527}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5904000, "num_steps_sampled": 3148800, "sample_time_ms": 21042.313, "load_time_ms": 37.258, "grad_time_ms": 9784.882, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003808257170021534, "policy_loss": -0.0040723783895373344, "vf_loss": 84.46407318115234, "vf_explained_var": 0.7558939456939697, "kl": 0.0020272734109312296, "entropy": 1.1315315961837769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3148800, "episodes_total": 7872, "training_iteration": 246, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-48-52", "timestamp": 1660254532, "time_this_iter_s": 30.6191668510437, "time_total_s": 12946.720875263214, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12946.720875263214, "timesteps_since_restore": 3148800, "iterations_since_restore": 246, "perf": {"cpu_util_percent": 29.048837209302324, "ram_util_percent": 58.57906976744185}} -{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 601.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 300.71}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.02, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.97, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.91, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0, 468.0, 636.0, 630.0, 504.0, 627.0, 633.0, 633.0, 570.0, 582.0, 593.0, 633.0, 636.0, 587.0, 579.0, 627.0, 579.0, 539.0, 587.0, 630.0, 636.0, 584.0, 579.0, 582.0, 630.0, 544.0, 584.0, 630.0, 587.0, 587.0, 573.0, 582.0, 573.0, 627.0, 527.0, 624.0, 587.0, 627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 229.0, 239.0, 318.0, 318.0, 308.0, 322.0, 238.0, 266.0, 318.0, 309.0, 318.0, 315.0, 322.0, 311.0, 290.0, 280.0, 285.0, 297.0, 302.0, 291.0, 322.0, 311.0, 324.0, 312.0, 304.0, 283.0, 293.0, 286.0, 321.0, 306.0, 285.0, 294.0, 275.0, 264.0, 293.0, 294.0, 318.0, 312.0, 319.0, 317.0, 277.0, 307.0, 291.0, 288.0, 288.0, 294.0, 319.0, 311.0, 268.0, 276.0, 300.0, 284.0, 314.0, 316.0, 290.0, 297.0, 299.0, 288.0, 291.0, 282.0, 287.0, 295.0, 287.0, 286.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0582836436059244, "mean_processing_ms": 0.2798326074687889, "mean_inference_ms": 1.6195165404601743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5928000, "num_steps_sampled": 3161600, "sample_time_ms": 21417.89, "load_time_ms": 36.65, "grad_time_ms": 9717.385, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007093754131346941, "policy_loss": -0.0009618126205168664, "vf_loss": 86.259033203125, "vf_explained_var": 0.7558541893959045, "kl": 0.001976242521777749, "entropy": 1.140650749206543, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3161600, "episodes_total": 7904, "training_iteration": 247, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-49-26", "timestamp": 1660254566, "time_this_iter_s": 34.550382137298584, "time_total_s": 12981.271257400513, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 12981.271257400513, "timesteps_since_restore": 3161600, "iterations_since_restore": 247, "perf": {"cpu_util_percent": 29.197959183673472, "ram_util_percent": 58.64285714285715}} -{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 608.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.075}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.55, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.87, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0, 627.0, 527.0, 624.0, 587.0, 627.0, 630.0, 579.0, 630.0, 627.0, 633.0, 630.0, 630.0, 582.0, 639.0, 627.0, 587.0, 639.0, 636.0, 582.0, 627.0, 582.0, 630.0, 633.0, 636.0, 513.0, 582.0, 630.0, 570.0, 584.0, 630.0, 633.0, 582.0, 630.0, 582.0, 636.0, 630.0, 633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 313.0, 314.0, 259.0, 268.0, 311.0, 313.0, 288.0, 299.0, 313.0, 314.0, 311.0, 319.0, 290.0, 289.0, 321.0, 309.0, 309.0, 318.0, 329.0, 304.0, 323.0, 307.0, 318.0, 312.0, 293.0, 289.0, 314.0, 325.0, 314.0, 313.0, 297.0, 290.0, 314.0, 325.0, 311.0, 325.0, 290.0, 292.0, 303.0, 324.0, 296.0, 286.0, 314.0, 316.0, 315.0, 318.0, 324.0, 312.0, 261.0, 252.0, 293.0, 289.0, 321.0, 309.0, 288.0, 282.0, 285.0, 299.0, 315.0, 315.0, 315.0, 318.0, 285.0, 297.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.055854669948681, "mean_processing_ms": 0.27935130516970164, "mean_inference_ms": 1.6177570824217435}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5952000, "num_steps_sampled": 3174400, "sample_time_ms": 21678.331, "load_time_ms": 37.099, "grad_time_ms": 9688.848, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009431429207324982, "policy_loss": -0.008797372691333294, "vf_loss": 84.26534271240234, "vf_explained_var": 0.7609202265739441, "kl": 0.001977160107344389, "entropy": 1.14460289478302, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3174400, "episodes_total": 7936, "training_iteration": 248, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-50-00", "timestamp": 1660254600, "time_this_iter_s": 33.773277044296265, "time_total_s": 13015.044534444809, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13015.044534444809, "timesteps_since_restore": 3174400, "iterations_since_restore": 248, "perf": {"cpu_util_percent": 27.302083333333332, "ram_util_percent": 58.68958333333333}} -{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 606.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 303.215}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.43, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.82, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0, 630.0, 582.0, 636.0, 630.0, 633.0, 582.0, 630.0, 576.0, 582.0, 633.0, 573.0, 587.0, 582.0, 630.0, 633.0, 582.0, 633.0, 530.0, 636.0, 582.0, 636.0, 584.0, 630.0, 633.0, 630.0, 582.0, 630.0, 521.0, 587.0, 627.0, 576.0, 630.0, 579.0, 630.0, 582.0, 584.0, 636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 319.0, 311.0, 292.0, 290.0, 319.0, 317.0, 319.0, 311.0, 319.0, 314.0, 295.0, 287.0, 314.0, 316.0, 291.0, 285.0, 288.0, 294.0, 319.0, 314.0, 279.0, 294.0, 290.0, 297.0, 288.0, 294.0, 303.0, 327.0, 324.0, 309.0, 286.0, 296.0, 316.0, 317.0, 261.0, 269.0, 322.0, 314.0, 288.0, 294.0, 317.0, 319.0, 296.0, 288.0, 316.0, 314.0, 321.0, 312.0, 301.0, 329.0, 293.0, 289.0, 313.0, 317.0, 259.0, 262.0, 293.0, 294.0, 308.0, 319.0, 283.0, 293.0, 308.0, 322.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0]}, "sampler_perf": {"mean_env_wait_ms": 1.053454756557531, "mean_processing_ms": 0.27887402151447716, "mean_inference_ms": 1.6161016017922192}, "off_policy_estimator": {}, "info": {"num_steps_trained": 5976000, "num_steps_sampled": 3187200, "sample_time_ms": 21826.606, "load_time_ms": 36.816, "grad_time_ms": 9749.499, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024596769362688065, "policy_loss": -0.005453174468129873, "vf_loss": 84.88723754882812, "vf_explained_var": 0.7672951221466064, "kl": 0.0021601892076432705, "entropy": 1.1517353057861328, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3187200, "episodes_total": 7968, "training_iteration": 249, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-50-33", "timestamp": 1660254633, "time_this_iter_s": 32.3484160900116, "time_total_s": 13047.39295053482, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13047.39295053482, "timesteps_since_restore": 3187200, "iterations_since_restore": 249, "perf": {"cpu_util_percent": 29.615217391304352, "ram_util_percent": 58.70434782608695}} -{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 604.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.285}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.77, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.29, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0, 579.0, 630.0, 582.0, 584.0, 636.0, 636.0, 576.0, 633.0, 587.0, 633.0, 630.0, 636.0, 596.0, 576.0, 636.0, 633.0, 630.0, 582.0, 582.0, 627.0, 630.0, 627.0, 639.0, 573.0, 627.0, 633.0, 582.0, 582.0, 582.0, 561.0, 627.0, 636.0, 636.0, 636.0, 584.0, 633.0, 633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 297.0, 282.0, 321.0, 309.0, 303.0, 279.0, 302.0, 282.0, 319.0, 317.0, 309.0, 327.0, 270.0, 306.0, 305.0, 328.0, 286.0, 301.0, 322.0, 311.0, 308.0, 322.0, 314.0, 322.0, 299.0, 297.0, 293.0, 283.0, 316.0, 320.0, 319.0, 314.0, 311.0, 319.0, 288.0, 294.0, 298.0, 284.0, 312.0, 315.0, 324.0, 306.0, 311.0, 316.0, 322.0, 317.0, 297.0, 276.0, 307.0, 320.0, 321.0, 312.0, 280.0, 302.0, 304.0, 278.0, 300.0, 282.0, 266.0, 295.0, 303.0, 324.0, 324.0, 312.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0510667862182812, "mean_processing_ms": 0.27840025138599184, "mean_inference_ms": 1.6143808795038155}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6000000, "num_steps_sampled": 3200000, "sample_time_ms": 22115.848, "load_time_ms": 36.609, "grad_time_ms": 9825.426, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0038353295531123877, "policy_loss": -0.004546869080513716, "vf_loss": 89.5432357788086, "vf_explained_var": 0.7639234662055969, "kl": 0.002313032979145646, "entropy": 1.144262671470642, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3200000, "episodes_total": 8000, "training_iteration": 250, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-51-06", "timestamp": 1660254666, "time_this_iter_s": 33.03909492492676, "time_total_s": 13080.432045459747, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13080.432045459747, "timesteps_since_restore": 3200000, "iterations_since_restore": 250, "perf": {"cpu_util_percent": 30.089130434782607, "ram_util_percent": 58.62826086956523}} -{"episode_reward_max": 639.0, "episode_reward_min": 425.0, "episode_reward_mean": 605.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.845}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.09, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.03, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.8, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.94, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.94, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.94, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0, 636.0, 636.0, 584.0, 633.0, 633.0, 627.0, 636.0, 627.0, 582.0, 582.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 582.0, 633.0, 630.0, 582.0, 425.0, 576.0, 578.0, 627.0, 587.0, 639.0, 636.0, 582.0, 630.0, 630.0, 525.0, 630.0, 579.0, 579.0, 630.0, 587.0, 627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 324.0, 312.0, 318.0, 318.0, 296.0, 288.0, 323.0, 310.0, 319.0, 314.0, 306.0, 321.0, 317.0, 319.0, 308.0, 319.0, 286.0, 296.0, 296.0, 286.0, 301.0, 286.0, 309.0, 321.0, 313.0, 317.0, 327.0, 309.0, 320.0, 313.0, 319.0, 314.0, 293.0, 289.0, 319.0, 314.0, 314.0, 316.0, 293.0, 289.0, 214.0, 211.0, 299.0, 277.0, 295.0, 283.0, 308.0, 319.0, 298.0, 289.0, 330.0, 309.0, 316.0, 320.0, 291.0, 291.0, 314.0, 316.0, 311.0, 319.0, 260.0, 265.0, 318.0, 312.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0486945411620672, "mean_processing_ms": 0.2779294488920563, "mean_inference_ms": 1.612487194421866}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6024000, "num_steps_sampled": 3212800, "sample_time_ms": 22207.762, "load_time_ms": 36.674, "grad_time_ms": 9691.515, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005205323453992605, "policy_loss": -0.0029822138603776693, "vf_loss": 87.57022857666016, "vf_explained_var": 0.7586490511894226, "kl": 0.0020639507565647364, "entropy": 1.1389611959457397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3212800, "episodes_total": 8032, "training_iteration": 251, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-51-37", "timestamp": 1660254697, "time_this_iter_s": 31.857529878616333, "time_total_s": 13112.289575338364, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13112.289575338364, "timesteps_since_restore": 3212800, "iterations_since_restore": 251, "perf": {"cpu_util_percent": 30.984444444444442, "ram_util_percent": 58.69777777777778}} -{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 606.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.325}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.25, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.27, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.22, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.8, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.8, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.8, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0, 579.0, 579.0, 630.0, 587.0, 627.0, 584.0, 624.0, 570.0, 579.0, 587.0, 567.0, 627.0, 587.0, 576.0, 636.0, 630.0, 539.0, 627.0, 639.0, 636.0, 587.0, 630.0, 570.0, 570.0, 584.0, 621.0, 582.0, 627.0, 582.0, 627.0, 579.0, 533.0, 630.0, 630.0, 579.0, 596.0, 630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 291.0, 288.0, 295.0, 284.0, 309.0, 321.0, 295.0, 292.0, 321.0, 306.0, 291.0, 293.0, 310.0, 314.0, 280.0, 290.0, 296.0, 283.0, 285.0, 302.0, 287.0, 280.0, 318.0, 309.0, 277.0, 310.0, 292.0, 284.0, 314.0, 322.0, 306.0, 324.0, 278.0, 261.0, 309.0, 318.0, 319.0, 320.0, 322.0, 314.0, 286.0, 301.0, 326.0, 304.0, 295.0, 275.0, 292.0, 278.0, 295.0, 289.0, 308.0, 313.0, 290.0, 292.0, 305.0, 322.0, 291.0, 291.0, 327.0, 300.0, 279.0, 300.0, 263.0, 270.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0463435116537838, "mean_processing_ms": 0.277465645143399, "mean_inference_ms": 1.6108149086882515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6048000, "num_steps_sampled": 3225600, "sample_time_ms": 22477.661, "load_time_ms": 36.527, "grad_time_ms": 9715.891, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003005747450515628, "policy_loss": -0.005394397769123316, "vf_loss": 89.67745208740234, "vf_explained_var": 0.7541216015815735, "kl": 0.0018617714522406459, "entropy": 1.1351839303970337, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3225600, "episodes_total": 8064, "training_iteration": 252, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-52-11", "timestamp": 1660254731, "time_this_iter_s": 33.91162323951721, "time_total_s": 13146.20119857788, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13146.20119857788, "timesteps_since_restore": 3225600, "iterations_since_restore": 252, "perf": {"cpu_util_percent": 29.32083333333333, "ram_util_percent": 58.725}} -{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 603.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.78}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.96, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0, 630.0, 630.0, 579.0, 596.0, 630.0, 579.0, 627.0, 630.0, 633.0, 633.0, 587.0, 639.0, 582.0, 581.0, 587.0, 624.0, 633.0, 621.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 582.0, 582.0, 627.0, 639.0, 630.0, 630.0, 587.0, 630.0, 627.0, 630.0, 630.0, 582.0, 636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 316.0, 314.0, 313.0, 317.0, 296.0, 283.0, 302.0, 294.0, 311.0, 319.0, 304.0, 275.0, 320.0, 307.0, 316.0, 314.0, 312.0, 321.0, 316.0, 317.0, 295.0, 292.0, 312.0, 327.0, 294.0, 288.0, 284.0, 297.0, 295.0, 292.0, 302.0, 322.0, 316.0, 317.0, 308.0, 313.0, 291.0, 291.0, 322.0, 317.0, 319.0, 317.0, 291.0, 291.0, 290.0, 297.0, 307.0, 320.0, 290.0, 292.0, 280.0, 302.0, 319.0, 308.0, 316.0, 323.0, 313.0, 317.0, 305.0, 325.0, 296.0, 291.0, 313.0, 317.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0439919117658958, "mean_processing_ms": 0.2769978233028824, "mean_inference_ms": 1.608730530277712}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6072000, "num_steps_sampled": 3238400, "sample_time_ms": 22102.374, "load_time_ms": 36.62, "grad_time_ms": 9528.927, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007198518142104149, "policy_loss": -0.002070576651021838, "vf_loss": 98.38677215576172, "vf_explained_var": 0.7492752075195312, "kl": 0.0017245132476091385, "entropy": 1.1391605138778687, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3238400, "episodes_total": 8096, "training_iteration": 253, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-52-38", "timestamp": 1660254758, "time_this_iter_s": 26.443045139312744, "time_total_s": 13172.644243717194, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13172.644243717194, "timesteps_since_restore": 3238400, "iterations_since_restore": 253, "perf": {"cpu_util_percent": 30.831578947368424, "ram_util_percent": 58.665789473684214}} -{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 603.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 301.77}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.54, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.71, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0, 627.0, 630.0, 630.0, 582.0, 636.0, 630.0, 630.0, 633.0, 633.0, 627.0, 582.0, 579.0, 630.0, 633.0, 633.0, 587.0, 633.0, 633.0, 636.0, 582.0, 570.0, 590.0, 579.0, 576.0, 630.0, 581.0, 579.0, 518.0, 636.0, 636.0, 633.0, 576.0, 590.0, 633.0, 636.0, 633.0, 582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 309.0, 318.0, 311.0, 319.0, 308.0, 322.0, 289.0, 293.0, 319.0, 317.0, 317.0, 313.0, 312.0, 318.0, 314.0, 319.0, 317.0, 316.0, 323.0, 304.0, 292.0, 290.0, 289.0, 290.0, 316.0, 314.0, 306.0, 327.0, 316.0, 317.0, 299.0, 288.0, 311.0, 322.0, 319.0, 314.0, 322.0, 314.0, 296.0, 286.0, 291.0, 279.0, 296.0, 294.0, 291.0, 288.0, 288.0, 288.0, 313.0, 317.0, 290.0, 291.0, 294.0, 285.0, 258.0, 260.0, 311.0, 325.0, 316.0, 320.0, 315.0, 318.0, 282.0, 294.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 1.041648317929554, "mean_processing_ms": 0.27652981742042726, "mean_inference_ms": 1.6063886219680592}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6096000, "num_steps_sampled": 3251200, "sample_time_ms": 22038.921, "load_time_ms": 36.872, "grad_time_ms": 9318.447, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028869707603007555, "policy_loss": -0.005314534064382315, "vf_loss": 87.72003936767578, "vf_explained_var": 0.7518091797828674, "kl": 0.0020599865820258856, "entropy": 1.1409815549850464, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3251200, "episodes_total": 8128, "training_iteration": 254, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-53-05", "timestamp": 1660254785, "time_this_iter_s": 27.532819986343384, "time_total_s": 13200.177063703537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13200.177063703537, "timesteps_since_restore": 3251200, "iterations_since_restore": 254, "perf": {"cpu_util_percent": 32.46923076923077, "ram_util_percent": 58.67179487179486}} -{"episode_reward_max": 639.0, "episode_reward_min": 473.0, "episode_reward_mean": 606.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 303.43}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.06, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.02, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.94, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.69, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0, 590.0, 633.0, 636.0, 633.0, 582.0, 567.0, 636.0, 570.0, 633.0, 633.0, 582.0, 582.0, 587.0, 579.0, 536.0, 587.0, 587.0, 567.0, 630.0, 587.0, 539.0, 582.0, 570.0, 473.0, 627.0, 627.0, 587.0, 587.0, 633.0, 581.0, 579.0, 576.0, 587.0, 636.0, 582.0, 582.0, 633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 301.0, 289.0, 318.0, 315.0, 314.0, 322.0, 316.0, 317.0, 288.0, 294.0, 285.0, 282.0, 319.0, 317.0, 278.0, 292.0, 321.0, 312.0, 318.0, 315.0, 288.0, 294.0, 290.0, 292.0, 303.0, 284.0, 299.0, 280.0, 268.0, 268.0, 288.0, 299.0, 291.0, 296.0, 278.0, 289.0, 315.0, 315.0, 295.0, 292.0, 265.0, 274.0, 294.0, 288.0, 288.0, 282.0, 242.0, 231.0, 316.0, 311.0, 319.0, 308.0, 288.0, 299.0, 303.0, 284.0, 317.0, 316.0, 289.0, 292.0, 283.0, 296.0, 287.0, 289.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0393196321657538, "mean_processing_ms": 0.2760628271968967, "mean_inference_ms": 1.603750642060521}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6120000, "num_steps_sampled": 3264000, "sample_time_ms": 22112.846, "load_time_ms": 37.19, "grad_time_ms": 9333.009, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005571722984313965, "policy_loss": -0.0022253356873989105, "vf_loss": 83.61035919189453, "vf_explained_var": 0.7589413523674011, "kl": 0.0018155118450522423, "entropy": 1.1279449462890625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3264000, "episodes_total": 8160, "training_iteration": 255, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-53-36", "timestamp": 1660254816, "time_this_iter_s": 31.015226125717163, "time_total_s": 13231.192289829254, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13231.192289829254, "timesteps_since_restore": 3264000, "iterations_since_restore": 255, "perf": {"cpu_util_percent": 32.91162790697674, "ram_util_percent": 59.26976744186045}} -{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 612.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.185}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.92, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.1, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0, 587.0, 636.0, 582.0, 582.0, 633.0, 584.0, 636.0, 630.0, 627.0, 630.0, 561.0, 633.0, 579.0, 627.0, 582.0, 579.0, 582.0, 627.0, 582.0, 636.0, 630.0, 630.0, 582.0, 633.0, 582.0, 636.0, 633.0, 627.0, 582.0, 630.0, 573.0, 587.0, 627.0, 633.0, 627.0, 639.0, 576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 293.0, 294.0, 314.0, 322.0, 289.0, 293.0, 296.0, 286.0, 314.0, 319.0, 304.0, 280.0, 327.0, 309.0, 316.0, 314.0, 316.0, 311.0, 316.0, 314.0, 269.0, 292.0, 316.0, 317.0, 285.0, 294.0, 313.0, 314.0, 288.0, 294.0, 298.0, 281.0, 293.0, 289.0, 324.0, 303.0, 286.0, 296.0, 316.0, 320.0, 316.0, 314.0, 313.0, 317.0, 290.0, 292.0, 314.0, 319.0, 285.0, 297.0, 317.0, 319.0, 324.0, 309.0, 310.0, 317.0, 299.0, 283.0, 323.0, 307.0, 282.0, 291.0, 294.0, 293.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0370215932262006, "mean_processing_ms": 0.275602527422642, "mean_inference_ms": 1.6014130194901954}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6144000, "num_steps_sampled": 3276800, "sample_time_ms": 22185.118, "load_time_ms": 37.192, "grad_time_ms": 9265.444, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001450125128030777, "policy_loss": -0.006554553750902414, "vf_loss": 85.70040893554688, "vf_explained_var": 0.7625378966331482, "kl": 0.0019486347446218133, "entropy": 1.1307319402694702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3276800, "episodes_total": 8192, "training_iteration": 256, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-54-07", "timestamp": 1660254847, "time_this_iter_s": 30.66650390625, "time_total_s": 13261.858793735504, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13261.858793735504, "timesteps_since_restore": 3276800, "iterations_since_restore": 256, "perf": {"cpu_util_percent": 32.206818181818186, "ram_util_percent": 58.78863636363636}} -{"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 611.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 305.745}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.09, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.22, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.76, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0, 627.0, 633.0, 627.0, 639.0, 576.0, 639.0, 630.0, 570.0, 636.0, 584.0, 636.0, 633.0, 624.0, 579.0, 627.0, 630.0, 582.0, 639.0, 627.0, 630.0, 627.0, 627.0, 630.0, 630.0, 624.0, 587.0, 636.0, 633.0, 567.0, 630.0, 627.0, 630.0, 630.0, 636.0, 630.0, 636.0, 530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 308.0, 319.0, 332.0, 301.0, 309.0, 318.0, 314.0, 325.0, 278.0, 298.0, 314.0, 325.0, 306.0, 324.0, 291.0, 279.0, 327.0, 309.0, 296.0, 288.0, 323.0, 313.0, 316.0, 317.0, 313.0, 311.0, 298.0, 281.0, 316.0, 311.0, 316.0, 314.0, 294.0, 288.0, 322.0, 317.0, 316.0, 311.0, 319.0, 311.0, 320.0, 307.0, 307.0, 320.0, 313.0, 317.0, 317.0, 313.0, 307.0, 317.0, 294.0, 293.0, 319.0, 317.0, 310.0, 323.0, 285.0, 282.0, 314.0, 316.0, 314.0, 313.0, 309.0, 321.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0347705534764586, "mean_processing_ms": 0.2751542992936445, "mean_inference_ms": 1.5997483422105416}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6168000, "num_steps_sampled": 3289600, "sample_time_ms": 22403.591, "load_time_ms": 37.585, "grad_time_ms": 9159.695, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004127854947000742, "policy_loss": -0.004435718059539795, "vf_loss": 91.31246185302734, "vf_explained_var": 0.7648020386695862, "kl": 0.0019896463491022587, "entropy": 1.1353529691696167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3289600, "episodes_total": 8224, "training_iteration": 257, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-54-43", "timestamp": 1660254883, "time_this_iter_s": 35.68418622016907, "time_total_s": 13297.542979955673, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13297.542979955673, "timesteps_since_restore": 3289600, "iterations_since_restore": 257, "perf": {"cpu_util_percent": 28.105999999999998, "ram_util_percent": 58.784}} -{"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 605.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.555}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.51, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.34, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.82, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0, 630.0, 636.0, 630.0, 636.0, 530.0, 639.0, 630.0, 639.0, 636.0, 569.0, 630.0, 579.0, 633.0, 576.0, 636.0, 579.0, 587.0, 627.0, 630.0, 579.0, 587.0, 633.0, 639.0, 630.0, 633.0, 636.0, 630.0, 630.0, 633.0, 576.0, 539.0, 630.0, 552.0, 590.0, 582.0, 630.0, 630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 313.0, 317.0, 319.0, 317.0, 321.0, 309.0, 315.0, 321.0, 265.0, 265.0, 319.0, 320.0, 319.0, 311.0, 317.0, 322.0, 319.0, 317.0, 282.0, 287.0, 311.0, 319.0, 285.0, 294.0, 321.0, 312.0, 285.0, 291.0, 312.0, 324.0, 290.0, 289.0, 296.0, 291.0, 316.0, 311.0, 322.0, 308.0, 293.0, 286.0, 285.0, 302.0, 319.0, 314.0, 320.0, 319.0, 310.0, 320.0, 313.0, 320.0, 317.0, 319.0, 319.0, 311.0, 308.0, 322.0, 310.0, 323.0, 288.0, 288.0, 266.0, 273.0, 310.0, 320.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.032534462731249, "mean_processing_ms": 0.2747100807108193, "mean_inference_ms": 1.5981869464429628}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6192000, "num_steps_sampled": 3302400, "sample_time_ms": 22164.935, "load_time_ms": 37.303, "grad_time_ms": 9157.709, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004157478455454111, "policy_loss": -0.004339639097452164, "vf_loss": 90.65621948242188, "vf_explained_var": 0.7486104965209961, "kl": 0.0018090683734044433, "entropy": 1.1370199918746948, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3302400, "episodes_total": 8256, "training_iteration": 258, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-55-14", "timestamp": 1660254914, "time_this_iter_s": 31.362817764282227, "time_total_s": 13328.905797719955, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13328.905797719955, "timesteps_since_restore": 3302400, "iterations_since_restore": 258, "perf": {"cpu_util_percent": 31.451111111111114, "ram_util_percent": 58.77555555555555}} -{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 602.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.23}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.46, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.12, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.12, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.12, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0, 552.0, 590.0, 582.0, 630.0, 630.0, 627.0, 579.0, 633.0, 627.0, 582.0, 579.0, 582.0, 576.0, 630.0, 579.0, 587.0, 630.0, 630.0, 582.0, 587.0, 576.0, 587.0, 476.0, 636.0, 633.0, 636.0, 636.0, 630.0, 630.0, 639.0, 636.0, 576.0, 627.0, 579.0, 582.0, 633.0, 533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 278.0, 274.0, 299.0, 291.0, 295.0, 287.0, 323.0, 307.0, 321.0, 309.0, 313.0, 314.0, 290.0, 289.0, 322.0, 311.0, 310.0, 317.0, 294.0, 288.0, 294.0, 285.0, 292.0, 290.0, 288.0, 288.0, 319.0, 311.0, 284.0, 295.0, 291.0, 296.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 297.0, 290.0, 292.0, 284.0, 287.0, 300.0, 236.0, 240.0, 316.0, 320.0, 310.0, 323.0, 317.0, 319.0, 324.0, 312.0, 311.0, 319.0, 316.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 291.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0303222005081312, "mean_processing_ms": 0.27427243552224245, "mean_inference_ms": 1.5968410183934156}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6216000, "num_steps_sampled": 3315200, "sample_time_ms": 22358.82, "load_time_ms": 37.268, "grad_time_ms": 9112.824, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012977579608559608, "policy_loss": -0.0070681399665772915, "vf_loss": 89.34113311767578, "vf_explained_var": 0.751798152923584, "kl": 0.0021080432925373316, "entropy": 1.1364187002182007, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3315200, "episodes_total": 8288, "training_iteration": 259, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-55-48", "timestamp": 1660254948, "time_this_iter_s": 33.836853981018066, "time_total_s": 13362.742651700974, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13362.742651700974, "timesteps_since_restore": 3315200, "iterations_since_restore": 259, "perf": {"cpu_util_percent": 29.470212765957445, "ram_util_percent": 58.776595744680854}} -{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 603.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.89}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.98, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0, 627.0, 579.0, 582.0, 633.0, 533.0, 627.0, 633.0, 573.0, 627.0, 630.0, 636.0, 633.0, 582.0, 633.0, 527.0, 633.0, 582.0, 627.0, 582.0, 636.0, 627.0, 582.0, 587.0, 582.0, 630.0, 570.0, 630.0, 573.0, 630.0, 627.0, 633.0, 530.0, 630.0, 513.0, 582.0, 558.0, 587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 313.0, 314.0, 289.0, 290.0, 290.0, 292.0, 319.0, 314.0, 266.0, 267.0, 308.0, 319.0, 309.0, 324.0, 288.0, 285.0, 313.0, 314.0, 311.0, 319.0, 322.0, 314.0, 314.0, 319.0, 298.0, 284.0, 319.0, 314.0, 265.0, 262.0, 316.0, 317.0, 284.0, 298.0, 316.0, 311.0, 284.0, 298.0, 309.0, 327.0, 313.0, 314.0, 299.0, 283.0, 287.0, 300.0, 290.0, 292.0, 316.0, 314.0, 289.0, 281.0, 316.0, 314.0, 281.0, 292.0, 310.0, 320.0, 306.0, 321.0, 326.0, 307.0, 270.0, 260.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0281105190585473, "mean_processing_ms": 0.27383411395901525, "mean_inference_ms": 1.5952417170076567}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6240000, "num_steps_sampled": 3328000, "sample_time_ms": 22352.291, "load_time_ms": 37.439, "grad_time_ms": 9262.711, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002568518975749612, "policy_loss": -0.005389755126088858, "vf_loss": 85.3111801147461, "vf_explained_var": 0.7668444514274597, "kl": 0.0014818129129707813, "entropy": 1.145686149597168, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3328000, "episodes_total": 8320, "training_iteration": 260, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-56-23", "timestamp": 1660254983, "time_this_iter_s": 34.4713191986084, "time_total_s": 13397.213970899582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13397.213970899582, "timesteps_since_restore": 3328000, "iterations_since_restore": 260, "perf": {"cpu_util_percent": 30.669387755102036, "ram_util_percent": 58.697959183673476}} -{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 602.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 301.485}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 184.57, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.11, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.11, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.11, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0, 630.0, 513.0, 582.0, 558.0, 587.0, 587.0, 573.0, 465.0, 581.0, 627.0, 633.0, 627.0, 576.0, 627.0, 639.0, 576.0, 636.0, 633.0, 555.0, 630.0, 636.0, 636.0, 627.0, 633.0, 582.0, 584.0, 584.0, 579.0, 633.0, 582.0, 627.0, 636.0, 627.0, 627.0, 587.0, 630.0, 627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 319.0, 311.0, 260.0, 253.0, 289.0, 293.0, 289.0, 269.0, 295.0, 292.0, 296.0, 291.0, 283.0, 290.0, 234.0, 231.0, 290.0, 291.0, 321.0, 306.0, 305.0, 328.0, 316.0, 311.0, 271.0, 305.0, 311.0, 316.0, 311.0, 328.0, 287.0, 289.0, 316.0, 320.0, 313.0, 320.0, 281.0, 274.0, 316.0, 314.0, 324.0, 312.0, 325.0, 311.0, 313.0, 314.0, 314.0, 319.0, 287.0, 295.0, 303.0, 281.0, 290.0, 294.0, 293.0, 286.0, 321.0, 312.0, 297.0, 285.0, 320.0, 307.0, 314.0, 322.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0259226733727673, "mean_processing_ms": 0.27340278893355835, "mean_inference_ms": 1.5937276386512644}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6264000, "num_steps_sampled": 3340800, "sample_time_ms": 22448.254, "load_time_ms": 37.569, "grad_time_ms": 9264.987, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003604738973081112, "policy_loss": -0.004238134250044823, "vf_loss": 84.2165298461914, "vf_explained_var": 0.770367443561554, "kl": 0.002041497267782688, "entropy": 1.1575653553009033, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3340800, "episodes_total": 8352, "training_iteration": 261, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-56-55", "timestamp": 1660255015, "time_this_iter_s": 32.84105324745178, "time_total_s": 13430.055024147034, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13430.055024147034, "timesteps_since_restore": 3340800, "iterations_since_restore": 261, "perf": {"cpu_util_percent": 32.742553191489364, "ram_util_percent": 58.7659574468085}} -{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 605.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 302.81}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.62, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0, 627.0, 627.0, 587.0, 630.0, 627.0, 633.0, 633.0, 544.0, 587.0, 587.0, 639.0, 587.0, 636.0, 633.0, 573.0, 587.0, 636.0, 567.0, 587.0, 633.0, 630.0, 633.0, 579.0, 627.0, 624.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 590.0, 630.0, 627.0, 630.0, 579.0, 639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 314.0, 311.0, 316.0, 301.0, 286.0, 319.0, 311.0, 316.0, 311.0, 317.0, 316.0, 309.0, 324.0, 268.0, 276.0, 288.0, 299.0, 299.0, 288.0, 322.0, 317.0, 288.0, 299.0, 319.0, 317.0, 319.0, 314.0, 290.0, 283.0, 287.0, 300.0, 311.0, 325.0, 285.0, 282.0, 290.0, 297.0, 319.0, 314.0, 311.0, 319.0, 312.0, 321.0, 277.0, 302.0, 320.0, 307.0, 319.0, 305.0, 314.0, 316.0, 295.0, 287.0, 296.0, 283.0, 313.0, 314.0, 284.0, 298.0, 289.0, 290.0, 294.0, 296.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0237598370369554, "mean_processing_ms": 0.2729750841964574, "mean_inference_ms": 1.5924306371043695}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6288000, "num_steps_sampled": 3353600, "sample_time_ms": 22598.435, "load_time_ms": 37.639, "grad_time_ms": 9270.4, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002879177627619356, "policy_loss": -0.007559783756732941, "vf_loss": 84.22747802734375, "vf_explained_var": 0.7608786225318909, "kl": 0.0017919730162248015, "entropy": 1.1500838994979858, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3353600, "episodes_total": 8384, "training_iteration": 262, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-57-31", "timestamp": 1660255051, "time_this_iter_s": 35.47017812728882, "time_total_s": 13465.525202274323, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13465.525202274323, "timesteps_since_restore": 3353600, "iterations_since_restore": 262, "perf": {"cpu_util_percent": 25.712, "ram_util_percent": 58.78}} -{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 606.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.415}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.23, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.88, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.69, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.89, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.89, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.89, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 630.0, 627.0, 630.0, 579.0, 639.0, 630.0, 630.0, 582.0, 567.0, 576.0, 633.0, 639.0, 579.0, 624.0, 633.0, 627.0, 627.0, 630.0, 633.0, 630.0, 582.0, 567.0, 351.0, 584.0, 558.0, 639.0, 584.0, 627.0, 633.0, 587.0, 633.0, 587.0, 630.0, 630.0, 582.0, 582.0, 633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 311.0, 319.0, 303.0, 324.0, 316.0, 314.0, 290.0, 289.0, 324.0, 315.0, 314.0, 316.0, 313.0, 317.0, 280.0, 302.0, 284.0, 283.0, 286.0, 290.0, 316.0, 317.0, 311.0, 328.0, 287.0, 292.0, 312.0, 312.0, 311.0, 322.0, 313.0, 314.0, 313.0, 314.0, 306.0, 324.0, 319.0, 314.0, 317.0, 313.0, 293.0, 289.0, 285.0, 282.0, 168.0, 183.0, 289.0, 295.0, 277.0, 281.0, 327.0, 312.0, 285.0, 299.0, 318.0, 309.0, 313.0, 320.0, 288.0, 299.0, 319.0, 314.0, 294.0, 293.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0216145913316899, "mean_processing_ms": 0.27255014515744136, "mean_inference_ms": 1.591234908963895}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6312000, "num_steps_sampled": 3366400, "sample_time_ms": 23169.124, "load_time_ms": 37.393, "grad_time_ms": 9419.779, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005937855690717697, "policy_loss": -0.0020803138613700867, "vf_loss": 85.89215087890625, "vf_explained_var": 0.7588068842887878, "kl": 0.001875289366580546, "entropy": 1.142077088356018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3366400, "episodes_total": 8416, "training_iteration": 263, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-58-05", "timestamp": 1660255085, "time_this_iter_s": 33.64332914352417, "time_total_s": 13499.168531417847, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13499.168531417847, "timesteps_since_restore": 3366400, "iterations_since_restore": 263, "perf": {"cpu_util_percent": 28.2468085106383, "ram_util_percent": 58.74042553191488}} -{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 605.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.84}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.28, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.72, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 582.0, 633.0, 633.0, 579.0, 582.0, 633.0, 633.0, 630.0, 636.0, 633.0, 576.0, 582.0, 627.0, 587.0, 587.0, 582.0, 633.0, 639.0, 579.0, 582.0, 576.0, 630.0, 630.0, 615.0, 570.0, 633.0, 579.0, 630.0, 633.0, 587.0, 581.0, 627.0, 582.0, 630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 316.0, 314.0, 298.0, 284.0, 278.0, 304.0, 323.0, 310.0, 316.0, 317.0, 285.0, 294.0, 286.0, 296.0, 315.0, 318.0, 326.0, 307.0, 316.0, 314.0, 315.0, 321.0, 314.0, 319.0, 301.0, 275.0, 291.0, 291.0, 308.0, 319.0, 308.0, 279.0, 302.0, 285.0, 291.0, 291.0, 314.0, 319.0, 314.0, 325.0, 301.0, 278.0, 300.0, 282.0, 296.0, 280.0, 313.0, 317.0, 316.0, 314.0, 304.0, 311.0, 290.0, 280.0, 313.0, 320.0, 296.0, 283.0, 308.0, 322.0, 319.0, 314.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0194825436603638, "mean_processing_ms": 0.27212618264491645, "mean_inference_ms": 1.590112958914433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6336000, "num_steps_sampled": 3379200, "sample_time_ms": 23605.349, "load_time_ms": 37.251, "grad_time_ms": 9861.879, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006588895921595395, "policy_loss": -0.008630036376416683, "vf_loss": 85.43579864501953, "vf_explained_var": 0.770875871181488, "kl": 0.0017466336721554399, "entropy": 1.1448642015457153, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3379200, "episodes_total": 8448, "training_iteration": 264, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-58-41", "timestamp": 1660255121, "time_this_iter_s": 36.31201386451721, "time_total_s": 13535.480545282364, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13535.480545282364, "timesteps_since_restore": 3379200, "iterations_since_restore": 264, "perf": {"cpu_util_percent": 29.313461538461542, "ram_util_percent": 59.09038461538463}} -{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 605.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.72}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.04, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0, 587.0, 581.0, 627.0, 582.0, 630.0, 630.0, 636.0, 581.0, 624.0, 633.0, 581.0, 579.0, 459.0, 630.0, 573.0, 633.0, 630.0, 633.0, 633.0, 627.0, 630.0, 627.0, 630.0, 627.0, 633.0, 636.0, 573.0, 579.0, 570.0, 624.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 291.0, 296.0, 295.0, 286.0, 316.0, 311.0, 286.0, 296.0, 313.0, 317.0, 316.0, 314.0, 322.0, 314.0, 285.0, 296.0, 306.0, 318.0, 317.0, 316.0, 288.0, 293.0, 288.0, 291.0, 235.0, 224.0, 324.0, 306.0, 285.0, 288.0, 313.0, 320.0, 312.0, 318.0, 315.0, 318.0, 310.0, 323.0, 311.0, 316.0, 308.0, 322.0, 306.0, 321.0, 315.0, 315.0, 321.0, 306.0, 314.0, 319.0, 321.0, 315.0, 283.0, 290.0, 301.0, 278.0, 282.0, 288.0, 305.0, 319.0, 310.0, 320.0, 327.0, 309.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0173660844031607, "mean_processing_ms": 0.27170594360174505, "mean_inference_ms": 1.5890528923225553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6360000, "num_steps_sampled": 3392000, "sample_time_ms": 24130.635, "load_time_ms": 37.048, "grad_time_ms": 9999.028, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024043002631515265, "policy_loss": -0.005360407754778862, "vf_loss": 83.37548828125, "vf_explained_var": 0.7662093043327332, "kl": 0.001953211845830083, "entropy": 1.145652174949646, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3392000, "episodes_total": 8480, "training_iteration": 265, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-59-19", "timestamp": 1660255159, "time_this_iter_s": 37.638370990753174, "time_total_s": 13573.118916273117, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13573.118916273117, "timesteps_since_restore": 3392000, "iterations_since_restore": 265, "perf": {"cpu_util_percent": 30.500000000000004, "ram_util_percent": 58.69433962264149}} -{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 604.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.405}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.21, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.64, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0, 582.0, 636.0, 582.0, 636.0, 579.0, 633.0, 627.0, 579.0, 573.0, 627.0, 579.0, 582.0, 587.0, 582.0, 579.0, 627.0, 582.0, 627.0, 579.0, 627.0, 525.0, 630.0, 587.0, 576.0, 633.0, 636.0, 519.0, 582.0, 576.0, 636.0, 636.0, 582.0, 630.0, 579.0, 633.0, 633.0, 582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 288.0, 294.0, 319.0, 317.0, 298.0, 284.0, 316.0, 320.0, 290.0, 289.0, 319.0, 314.0, 308.0, 319.0, 283.0, 296.0, 290.0, 283.0, 316.0, 311.0, 291.0, 288.0, 291.0, 291.0, 294.0, 293.0, 291.0, 291.0, 295.0, 284.0, 313.0, 314.0, 295.0, 287.0, 313.0, 314.0, 291.0, 288.0, 313.0, 314.0, 264.0, 261.0, 304.0, 326.0, 283.0, 304.0, 292.0, 284.0, 309.0, 324.0, 314.0, 322.0, 242.0, 277.0, 294.0, 288.0, 287.0, 289.0, 317.0, 319.0, 320.0, 316.0, 291.0, 291.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0152699162864507, "mean_processing_ms": 0.27129002186513274, "mean_inference_ms": 1.588063531007753}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6384000, "num_steps_sampled": 3404800, "sample_time_ms": 24421.033, "load_time_ms": 37.381, "grad_time_ms": 10403.492, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023645039182156324, "policy_loss": -0.005783146247267723, "vf_loss": 87.20269775390625, "vf_explained_var": 0.7547242045402527, "kl": 0.0018250799039378762, "entropy": 1.1452516317367554, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3404800, "episodes_total": 8512, "training_iteration": 266, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_22-59-56", "timestamp": 1660255196, "time_this_iter_s": 37.62380003929138, "time_total_s": 13610.742716312408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13610.742716312408, "timesteps_since_restore": 3404800, "iterations_since_restore": 266, "perf": {"cpu_util_percent": 28.747169811320756, "ram_util_percent": 58.75471698113208}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 608.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.32}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.24, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.66, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0, 630.0, 579.0, 633.0, 633.0, 582.0, 582.0, 633.0, 636.0, 639.0, 633.0, 630.0, 630.0, 633.0, 582.0, 576.0, 579.0, 587.0, 587.0, 582.0, 582.0, 633.0, 582.0, 630.0, 630.0, 621.0, 621.0, 582.0, 630.0, 630.0, 630.0, 564.0, 624.0, 582.0, 627.0, 573.0, 630.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 316.0, 314.0, 283.0, 296.0, 319.0, 314.0, 319.0, 314.0, 293.0, 289.0, 283.0, 299.0, 316.0, 317.0, 316.0, 320.0, 319.0, 320.0, 324.0, 309.0, 321.0, 309.0, 309.0, 321.0, 324.0, 309.0, 289.0, 293.0, 292.0, 284.0, 291.0, 288.0, 293.0, 294.0, 294.0, 293.0, 289.0, 293.0, 289.0, 293.0, 316.0, 317.0, 291.0, 291.0, 313.0, 317.0, 316.0, 314.0, 313.0, 308.0, 316.0, 305.0, 292.0, 290.0, 312.0, 318.0, 316.0, 314.0, 314.0, 316.0, 284.0, 280.0, 321.0, 303.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0132060848493245, "mean_processing_ms": 0.27088101809255155, "mean_inference_ms": 1.5871323468376288}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6408000, "num_steps_sampled": 3417600, "sample_time_ms": 24236.974, "load_time_ms": 37.027, "grad_time_ms": 10553.07, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002214438281953335, "policy_loss": -0.00573044503107667, "vf_loss": 85.19269561767578, "vf_explained_var": 0.7630549073219299, "kl": 0.0019984643440693617, "entropy": 1.1487520933151245, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3417600, "episodes_total": 8544, "training_iteration": 267, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-00-32", "timestamp": 1660255232, "time_this_iter_s": 35.333903789520264, "time_total_s": 13646.076620101929, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13646.076620101929, "timesteps_since_restore": 3417600, "iterations_since_restore": 267, "perf": {"cpu_util_percent": 32.378, "ram_util_percent": 58.867999999999995}} -{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 301.95}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.1, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.73, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0, 582.0, 627.0, 573.0, 630.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 636.0, 579.0, 624.0, 576.0, 633.0, 630.0, 630.0, 576.0, 633.0, 636.0, 627.0, 582.0, 576.0, 621.0, 639.0, 582.0, 582.0, 630.0, 582.0, 579.0, 533.0, 579.0, 582.0, 630.0, 636.0, 630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 301.0, 281.0, 313.0, 314.0, 288.0, 285.0, 314.0, 316.0, 313.0, 317.0, 323.0, 304.0, 288.0, 294.0, 285.0, 297.0, 330.0, 297.0, 321.0, 309.0, 315.0, 315.0, 312.0, 324.0, 295.0, 284.0, 315.0, 309.0, 293.0, 283.0, 322.0, 311.0, 324.0, 306.0, 316.0, 314.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 295.0, 290.0, 286.0, 313.0, 308.0, 314.0, 325.0, 291.0, 291.0, 286.0, 296.0, 309.0, 321.0, 288.0, 294.0, 285.0, 294.0, 267.0, 266.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0111539095910416, "mean_processing_ms": 0.2704732512620114, "mean_inference_ms": 1.5859630571947259}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6432000, "num_steps_sampled": 3430400, "sample_time_ms": 24395.085, "load_time_ms": 36.597, "grad_time_ms": 10568.153, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002145373960956931, "policy_loss": -0.006650958210229874, "vf_loss": 93.7173843383789, "vf_explained_var": 0.7514896392822266, "kl": 0.0022997509222477674, "entropy": 1.150799036026001, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3430400, "episodes_total": 8576, "training_iteration": 268, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-01-05", "timestamp": 1660255265, "time_this_iter_s": 33.08881878852844, "time_total_s": 13679.165438890457, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13679.165438890457, "timesteps_since_restore": 3430400, "iterations_since_restore": 268, "perf": {"cpu_util_percent": 29.461702127659574, "ram_util_percent": 58.71702127659575}} -{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 301.685}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 184.97, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0, 579.0, 582.0, 630.0, 636.0, 630.0, 573.0, 542.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 522.0, 576.0, 636.0, 630.0, 587.0, 630.0, 581.0, 633.0, 587.0, 569.0, 627.0, 636.0, 582.0, 633.0, 576.0, 584.0, 633.0, 636.0, 636.0, 624.0, 627.0, 639.0, 636.0, 579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 286.0, 296.0, 286.0, 314.0, 316.0, 317.0, 319.0, 317.0, 313.0, 274.0, 299.0, 276.0, 266.0, 306.0, 324.0, 321.0, 309.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 283.0, 299.0, 262.0, 260.0, 293.0, 283.0, 308.0, 328.0, 311.0, 319.0, 287.0, 300.0, 313.0, 317.0, 285.0, 296.0, 315.0, 318.0, 302.0, 285.0, 282.0, 287.0, 319.0, 308.0, 317.0, 319.0, 291.0, 291.0, 313.0, 320.0, 276.0, 300.0, 295.0, 289.0, 314.0, 319.0, 316.0, 320.0, 320.0, 316.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0091026125530362, "mean_processing_ms": 0.27006446196399875, "mean_inference_ms": 1.584328225519923}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6456000, "num_steps_sampled": 3443200, "sample_time_ms": 23964.302, "load_time_ms": 37.107, "grad_time_ms": 10520.809, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005307864397764206, "policy_loss": -0.0031857620924711227, "vf_loss": 90.64007568359375, "vf_explained_var": 0.7599647641181946, "kl": 0.0021453702356666327, "entropy": 1.14076566696167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3443200, "episodes_total": 8608, "training_iteration": 269, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-01-34", "timestamp": 1660255294, "time_this_iter_s": 29.06058406829834, "time_total_s": 13708.226022958755, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13708.226022958755, "timesteps_since_restore": 3443200, "iterations_since_restore": 269, "perf": {"cpu_util_percent": 32.897560975609764, "ram_util_percent": 58.64878048780488}} -{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 601.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 300.96}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.12, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.36, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.66, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0, 624.0, 627.0, 639.0, 636.0, 579.0, 630.0, 576.0, 630.0, 636.0, 582.0, 522.0, 639.0, 587.0, 582.0, 473.0, 633.0, 630.0, 584.0, 581.0, 636.0, 587.0, 579.0, 402.0, 627.0, 630.0, 630.0, 636.0, 627.0, 633.0, 636.0, 582.0, 627.0, 462.0, 630.0, 630.0, 633.0, 633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 319.0, 305.0, 313.0, 314.0, 324.0, 315.0, 314.0, 322.0, 288.0, 291.0, 308.0, 322.0, 285.0, 291.0, 321.0, 309.0, 321.0, 315.0, 294.0, 288.0, 266.0, 256.0, 322.0, 317.0, 295.0, 292.0, 296.0, 286.0, 229.0, 244.0, 331.0, 302.0, 316.0, 314.0, 287.0, 297.0, 296.0, 285.0, 327.0, 309.0, 291.0, 296.0, 284.0, 295.0, 202.0, 200.0, 308.0, 319.0, 311.0, 319.0, 322.0, 308.0, 316.0, 320.0, 306.0, 321.0, 317.0, 316.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0070340183284676, "mean_processing_ms": 0.26964921546752857, "mean_inference_ms": 1.5821965593270972}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6480000, "num_steps_sampled": 3456000, "sample_time_ms": 23540.277, "load_time_ms": 37.062, "grad_time_ms": 10283.314, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001998053165152669, "policy_loss": -0.00615869602188468, "vf_loss": 87.24394989013672, "vf_explained_var": 0.7729328274726868, "kl": 0.00186056864913553, "entropy": 1.1353095769882202, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3456000, "episodes_total": 8640, "training_iteration": 270, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-02-02", "timestamp": 1660255322, "time_this_iter_s": 27.856099128723145, "time_total_s": 13736.082122087479, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13736.082122087479, "timesteps_since_restore": 3456000, "iterations_since_restore": 270, "perf": {"cpu_util_percent": 34.1025641025641, "ram_util_percent": 58.69999999999998}} -{"episode_reward_max": 639.0, "episode_reward_min": 436.0, "episode_reward_mean": 604.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 211.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 302.35}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.3, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.91, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0, 462.0, 630.0, 630.0, 633.0, 633.0, 561.0, 582.0, 627.0, 627.0, 633.0, 630.0, 636.0, 633.0, 558.0, 582.0, 579.0, 639.0, 636.0, 570.0, 582.0, 587.0, 639.0, 627.0, 584.0, 587.0, 579.0, 582.0, 624.0, 579.0, 636.0, 633.0, 633.0, 582.0, 627.0, 576.0, 570.0, 582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 242.0, 220.0, 313.0, 317.0, 324.0, 306.0, 322.0, 311.0, 321.0, 312.0, 289.0, 272.0, 286.0, 296.0, 317.0, 310.0, 313.0, 314.0, 324.0, 309.0, 313.0, 317.0, 312.0, 324.0, 311.0, 322.0, 276.0, 282.0, 293.0, 289.0, 293.0, 286.0, 316.0, 323.0, 305.0, 331.0, 291.0, 279.0, 290.0, 292.0, 292.0, 295.0, 317.0, 322.0, 303.0, 324.0, 298.0, 286.0, 293.0, 294.0, 293.0, 286.0, 290.0, 292.0, 308.0, 316.0, 278.0, 301.0, 314.0, 322.0, 312.0, 321.0, 316.0, 317.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0049654393783378, "mean_processing_ms": 0.26923341324594424, "mean_inference_ms": 1.5798214340992636}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6504000, "num_steps_sampled": 3468800, "sample_time_ms": 23269.747, "load_time_ms": 36.886, "grad_time_ms": 10264.548, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00183187669608742, "policy_loss": -0.006337564438581467, "vf_loss": 87.34710693359375, "vf_explained_var": 0.7639560103416443, "kl": 0.0018626012606546283, "entropy": 1.1305490732192993, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3468800, "episodes_total": 8672, "training_iteration": 271, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-02-32", "timestamp": 1660255352, "time_this_iter_s": 29.946385145187378, "time_total_s": 13766.028507232666, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13766.028507232666, "timesteps_since_restore": 3468800, "iterations_since_restore": 271, "perf": {"cpu_util_percent": 32.345238095238095, "ram_util_percent": 58.68095238095239}} -{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 604.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 302.17}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.14, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.61, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0, 582.0, 627.0, 576.0, 570.0, 582.0, 576.0, 584.0, 587.0, 636.0, 564.0, 630.0, 587.0, 582.0, 639.0, 630.0, 587.0, 579.0, 639.0, 587.0, 636.0, 530.0, 636.0, 630.0, 633.0, 530.0, 633.0, 639.0, 636.0, 582.0, 587.0, 582.0, 627.0, 587.0, 576.0, 639.0, 590.0, 582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 285.0, 297.0, 313.0, 314.0, 280.0, 296.0, 288.0, 282.0, 293.0, 289.0, 294.0, 282.0, 293.0, 291.0, 296.0, 291.0, 315.0, 321.0, 282.0, 282.0, 322.0, 308.0, 308.0, 279.0, 288.0, 294.0, 314.0, 325.0, 311.0, 319.0, 293.0, 294.0, 288.0, 291.0, 319.0, 320.0, 288.0, 299.0, 319.0, 317.0, 268.0, 262.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 258.0, 272.0, 314.0, 319.0, 325.0, 314.0, 321.0, 315.0, 283.0, 299.0, 293.0, 294.0, 288.0, 294.0, 316.0, 311.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 1.002917566902111, "mean_processing_ms": 0.26882194776394586, "mean_inference_ms": 1.577540173061489}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6528000, "num_steps_sampled": 3481600, "sample_time_ms": 22780.285, "load_time_ms": 36.737, "grad_time_ms": 10254.225, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0052925958298146725, "policy_loss": -0.002937593497335911, "vf_loss": 88.02587890625, "vf_explained_var": 0.7725896835327148, "kl": 0.0019184405682608485, "entropy": 1.144766926765442, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3481600, "episodes_total": 8704, "training_iteration": 272, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-03-02", "timestamp": 1660255382, "time_this_iter_s": 30.47255301475525, "time_total_s": 13796.501060247421, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13796.501060247421, "timesteps_since_restore": 3481600, "iterations_since_restore": 272, "perf": {"cpu_util_percent": 32.334090909090904, "ram_util_percent": 58.665909090909096}} -{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 611.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 331.0}, "policy_reward_mean": {"ppo": 305.985}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.77, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.91, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.91, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.91, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0, 587.0, 576.0, 639.0, 590.0, 582.0, 587.0, 627.0, 636.0, 633.0, 636.0, 627.0, 633.0, 579.0, 582.0, 573.0, 633.0, 570.0, 624.0, 627.0, 639.0, 636.0, 633.0, 636.0, 630.0, 579.0, 576.0, 436.0, 639.0, 587.0, 636.0, 636.0, 639.0, 587.0, 636.0, 587.0, 639.0, 633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 290.0, 297.0, 285.0, 291.0, 322.0, 317.0, 291.0, 299.0, 294.0, 288.0, 284.0, 303.0, 315.0, 312.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 310.0, 317.0, 324.0, 309.0, 289.0, 290.0, 294.0, 288.0, 284.0, 289.0, 303.0, 330.0, 277.0, 293.0, 316.0, 308.0, 321.0, 306.0, 314.0, 325.0, 319.0, 317.0, 314.0, 319.0, 331.0, 305.0, 321.0, 309.0, 293.0, 286.0, 288.0, 288.0, 211.0, 225.0, 312.0, 327.0, 293.0, 294.0, 314.0, 322.0, 317.0, 319.0, 314.0, 325.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0]}, "sampler_perf": {"mean_env_wait_ms": 1.0008984560803753, "mean_processing_ms": 0.26841904280245515, "mean_inference_ms": 1.5754578335271856}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6552000, "num_steps_sampled": 3494400, "sample_time_ms": 22479.799, "load_time_ms": 36.968, "grad_time_ms": 10345.078, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.007351151201874018, "policy_loss": -0.0004584121925290674, "vf_loss": 83.76141357421875, "vf_explained_var": 0.7616392970085144, "kl": 0.0025154289323836565, "entropy": 1.1331415176391602, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3494400, "episodes_total": 8736, "training_iteration": 273, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-03-34", "timestamp": 1660255414, "time_this_iter_s": 31.54677987098694, "time_total_s": 13828.047840118408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13828.047840118408, "timesteps_since_restore": 3494400, "iterations_since_restore": 273, "perf": {"cpu_util_percent": 36.990909090909085, "ram_util_percent": 59.28409090909091}} -{"episode_reward_max": 639.0, "episode_reward_min": 371.0, "episode_reward_mean": 613.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 306.765}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.73, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.81, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.28, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0, 587.0, 636.0, 587.0, 639.0, 633.0, 627.0, 636.0, 636.0, 633.0, 582.0, 582.0, 639.0, 639.0, 582.0, 639.0, 425.0, 587.0, 630.0, 636.0, 582.0, 636.0, 582.0, 371.0, 579.0, 582.0, 636.0, 636.0, 636.0, 636.0, 636.0, 627.0, 579.0, 639.0, 636.0, 582.0, 636.0, 587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 302.0, 285.0, 314.0, 322.0, 293.0, 294.0, 317.0, 322.0, 311.0, 322.0, 321.0, 306.0, 319.0, 317.0, 324.0, 312.0, 309.0, 324.0, 293.0, 289.0, 291.0, 291.0, 317.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 320.0, 206.0, 219.0, 301.0, 286.0, 314.0, 316.0, 314.0, 322.0, 285.0, 297.0, 316.0, 320.0, 288.0, 294.0, 198.0, 173.0, 291.0, 288.0, 295.0, 287.0, 321.0, 315.0, 311.0, 325.0, 314.0, 322.0, 322.0, 314.0, 326.0, 310.0, 314.0, 313.0, 286.0, 293.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9989007894762186, "mean_processing_ms": 0.2680204764201923, "mean_inference_ms": 1.5734861760997552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6576000, "num_steps_sampled": 3507200, "sample_time_ms": 22242.32, "load_time_ms": 36.671, "grad_time_ms": 10040.843, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004968150169588625, "policy_loss": -0.007466705050319433, "vf_loss": 85.29949188232422, "vf_explained_var": 0.7543535232543945, "kl": 0.0017724571516737342, "entropy": 1.1328660249710083, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3507200, "episodes_total": 8768, "training_iteration": 274, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-04-05", "timestamp": 1660255445, "time_this_iter_s": 30.891488075256348, "time_total_s": 13858.939328193665, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13858.939328193665, "timesteps_since_restore": 3507200, "iterations_since_restore": 274, "perf": {"cpu_util_percent": 32.32954545454545, "ram_util_percent": 58.86590909090909}} -{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 610.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.345}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.09, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.76, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.57, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 636.0, 582.0, 636.0, 587.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 615.0, 636.0, 630.0, 636.0, 576.0, 633.0, 627.0, 639.0, 582.0, 633.0, 582.0, 633.0, 627.0, 630.0, 636.0, 582.0, 633.0, 627.0, 639.0, 639.0, 627.0, 633.0, 633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 319.0, 320.0, 317.0, 319.0, 291.0, 291.0, 309.0, 327.0, 289.0, 298.0, 318.0, 312.0, 318.0, 312.0, 324.0, 309.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 314.0, 322.0, 316.0, 317.0, 322.0, 314.0, 305.0, 310.0, 325.0, 311.0, 321.0, 309.0, 319.0, 317.0, 296.0, 280.0, 317.0, 316.0, 321.0, 306.0, 324.0, 315.0, 296.0, 286.0, 321.0, 312.0, 288.0, 294.0, 318.0, 315.0, 316.0, 311.0, 320.0, 310.0, 317.0, 319.0, 294.0, 288.0, 318.0, 315.0, 316.0, 311.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9969205996800542, "mean_processing_ms": 0.26762558358937055, "mean_inference_ms": 1.571468951953135}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6600000, "num_steps_sampled": 3520000, "sample_time_ms": 21659.121, "load_time_ms": 36.576, "grad_time_ms": 9832.58, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005525531247258186, "policy_loss": -0.002899330807849765, "vf_loss": 89.90011596679688, "vf_explained_var": 0.7821382880210876, "kl": 0.002336545381695032, "entropy": 1.130285382270813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3520000, "episodes_total": 8800, "training_iteration": 275, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-04-34", "timestamp": 1660255474, "time_this_iter_s": 29.723124265670776, "time_total_s": 13888.662452459335, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13888.662452459335, "timesteps_since_restore": 3520000, "iterations_since_restore": 275, "perf": {"cpu_util_percent": 34.31666666666666, "ram_util_percent": 58.82619047619047}} -{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 609.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.695}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.59, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.43, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.43, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.43, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0, 639.0, 639.0, 627.0, 633.0, 633.0, 636.0, 630.0, 630.0, 582.0, 522.0, 587.0, 630.0, 627.0, 633.0, 633.0, 630.0, 587.0, 582.0, 633.0, 636.0, 636.0, 621.0, 627.0, 587.0, 573.0, 630.0, 639.0, 630.0, 630.0, 636.0, 567.0, 579.0, 582.0, 579.0, 633.0, 639.0, 633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 322.0, 317.0, 327.0, 312.0, 316.0, 311.0, 308.0, 325.0, 316.0, 317.0, 320.0, 316.0, 314.0, 316.0, 313.0, 317.0, 286.0, 296.0, 260.0, 262.0, 293.0, 294.0, 311.0, 319.0, 321.0, 306.0, 318.0, 315.0, 314.0, 319.0, 319.0, 311.0, 301.0, 286.0, 286.0, 296.0, 311.0, 322.0, 319.0, 317.0, 322.0, 314.0, 305.0, 316.0, 316.0, 311.0, 298.0, 289.0, 279.0, 294.0, 324.0, 306.0, 321.0, 318.0, 319.0, 311.0, 316.0, 314.0, 321.0, 315.0, 281.0, 286.0, 282.0, 297.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9949570051845532, "mean_processing_ms": 0.2672332587743119, "mean_inference_ms": 1.569562529998314}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6624000, "num_steps_sampled": 3532800, "sample_time_ms": 21368.312, "load_time_ms": 36.431, "grad_time_ms": 9398.907, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024656467139720917, "policy_loss": -0.005234332289546728, "vf_loss": 82.6478500366211, "vf_explained_var": 0.7628920078277588, "kl": 0.001980842323973775, "entropy": 1.1296080350875854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3532800, "episodes_total": 8832, "training_iteration": 276, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-05-05", "timestamp": 1660255505, "time_this_iter_s": 30.375401973724365, "time_total_s": 13919.03785443306, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13919.03785443306, "timesteps_since_restore": 3532800, "iterations_since_restore": 276, "perf": {"cpu_util_percent": 33.07209302325582, "ram_util_percent": 58.767441860465105}} -{"episode_reward_max": 639.0, "episode_reward_min": 362.0, "episode_reward_mean": 610.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.36}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.9, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.76, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0, 582.0, 579.0, 633.0, 639.0, 633.0, 636.0, 522.0, 390.0, 582.0, 516.0, 630.0, 627.0, 587.0, 633.0, 582.0, 636.0, 582.0, 447.0, 630.0, 639.0, 636.0, 636.0, 633.0, 362.0, 627.0, 636.0, 630.0, 639.0, 579.0, 621.0, 639.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 280.0, 302.0, 291.0, 288.0, 303.0, 330.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 260.0, 262.0, 197.0, 193.0, 291.0, 291.0, 256.0, 260.0, 314.0, 316.0, 308.0, 319.0, 286.0, 301.0, 311.0, 322.0, 289.0, 293.0, 322.0, 314.0, 288.0, 294.0, 221.0, 226.0, 318.0, 312.0, 317.0, 322.0, 317.0, 319.0, 318.0, 318.0, 309.0, 324.0, 182.0, 180.0, 316.0, 311.0, 316.0, 320.0, 319.0, 311.0, 319.0, 320.0, 294.0, 285.0, 301.0, 320.0, 327.0, 312.0, 314.0, 325.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.993008865355136, "mean_processing_ms": 0.26684329670316986, "mean_inference_ms": 1.5676868637222179}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6648000, "num_steps_sampled": 3545600, "sample_time_ms": 21068.026, "load_time_ms": 36.686, "grad_time_ms": 9153.242, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004478854592889547, "policy_loss": -0.0033003378193825483, "vf_loss": 83.42573547363281, "vf_explained_var": 0.7645106911659241, "kl": 0.002364285057410598, "entropy": 1.1267634630203247, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3545600, "episodes_total": 8864, "training_iteration": 277, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-05-35", "timestamp": 1660255535, "time_this_iter_s": 29.876389980316162, "time_total_s": 13948.914244413376, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13948.914244413376, "timesteps_since_restore": 3545600, "iterations_since_restore": 277, "perf": {"cpu_util_percent": 33.73571428571429, "ram_util_percent": 58.82857142857141}} -{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.89}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.38, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.68, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 19.09, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.09, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.92, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.85, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0, 587.0, 636.0, 587.0, 630.0, 639.0, 630.0, 639.0, 630.0, 587.0, 582.0, 639.0, 636.0, 639.0, 582.0, 630.0, 627.0, 627.0, 633.0, 639.0, 570.0, 579.0, 633.0, 627.0, 627.0, 636.0, 636.0, 630.0, 633.0, 633.0, 621.0, 573.0, 636.0, 627.0, 530.0, 627.0, 636.0, 584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 281.0, 306.0, 316.0, 320.0, 301.0, 286.0, 316.0, 314.0, 325.0, 314.0, 309.0, 321.0, 315.0, 324.0, 310.0, 320.0, 297.0, 290.0, 284.0, 298.0, 322.0, 317.0, 319.0, 317.0, 319.0, 320.0, 283.0, 299.0, 314.0, 316.0, 314.0, 313.0, 308.0, 319.0, 322.0, 311.0, 321.0, 318.0, 287.0, 283.0, 283.0, 296.0, 319.0, 314.0, 311.0, 316.0, 314.0, 313.0, 322.0, 314.0, 317.0, 319.0, 318.0, 312.0, 309.0, 324.0, 319.0, 314.0, 297.0, 324.0, 279.0, 294.0, 316.0, 320.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9910719747703968, "mean_processing_ms": 0.26645500095607105, "mean_inference_ms": 1.5658690640351203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6672000, "num_steps_sampled": 3558400, "sample_time_ms": 20841.044, "load_time_ms": 37.005, "grad_time_ms": 9278.753, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004964211490005255, "policy_loss": -0.0031354122329503298, "vf_loss": 86.61837768554688, "vf_explained_var": 0.7653247714042664, "kl": 0.0020841285586357117, "entropy": 1.1244021654129028, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3558400, "episodes_total": 8896, "training_iteration": 278, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-06-07", "timestamp": 1660255567, "time_this_iter_s": 32.07894992828369, "time_total_s": 13980.99319434166, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 13980.99319434166, "timesteps_since_restore": 3558400, "iterations_since_restore": 278, "perf": {"cpu_util_percent": 33.79111111111111, "ram_util_percent": 58.81111111111109}} -{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.905}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.41, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.73, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.95, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.9, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.86, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0, 627.0, 530.0, 627.0, 636.0, 584.0, 639.0, 636.0, 633.0, 633.0, 636.0, 627.0, 627.0, 587.0, 636.0, 552.0, 636.0, 636.0, 636.0, 639.0, 636.0, 587.0, 636.0, 621.0, 630.0, 636.0, 633.0, 527.0, 636.0, 633.0, 630.0, 587.0, 630.0, 627.0, 582.0, 630.0, 639.0, 633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 318.0, 309.0, 251.0, 279.0, 317.0, 310.0, 319.0, 317.0, 287.0, 297.0, 319.0, 320.0, 321.0, 315.0, 311.0, 322.0, 317.0, 316.0, 316.0, 320.0, 323.0, 304.0, 316.0, 311.0, 291.0, 296.0, 324.0, 312.0, 274.0, 278.0, 324.0, 312.0, 325.0, 311.0, 311.0, 325.0, 314.0, 325.0, 323.0, 313.0, 293.0, 294.0, 309.0, 327.0, 310.0, 311.0, 318.0, 312.0, 322.0, 314.0, 304.0, 329.0, 266.0, 261.0, 329.0, 307.0, 311.0, 322.0, 316.0, 314.0, 294.0, 293.0, 316.0, 314.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9891622022532315, "mean_processing_ms": 0.26607283169040735, "mean_inference_ms": 1.56436366595401}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6696000, "num_steps_sampled": 3571200, "sample_time_ms": 21399.6, "load_time_ms": 36.9, "grad_time_ms": 9533.999, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006114859133958817, "policy_loss": -0.007230747956782579, "vf_loss": 84.06954956054688, "vf_explained_var": 0.7658140063285828, "kl": 0.0017542889108881354, "entropy": 1.129442572593689, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3571200, "episodes_total": 8928, "training_iteration": 279, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-06-44", "timestamp": 1660255604, "time_this_iter_s": 37.19960618019104, "time_total_s": 14018.19280052185, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14018.19280052185, "timesteps_since_restore": 3571200, "iterations_since_restore": 279, "perf": {"cpu_util_percent": 33.281132075471696, "ram_util_percent": 58.9301886792453}} -{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 612.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.095}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.79, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.72, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.73, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0, 627.0, 582.0, 630.0, 639.0, 633.0, 587.0, 627.0, 576.0, 544.0, 636.0, 630.0, 633.0, 633.0, 615.0, 582.0, 636.0, 639.0, 636.0, 633.0, 579.0, 630.0, 633.0, 633.0, 636.0, 639.0, 579.0, 636.0, 630.0, 639.0, 633.0, 582.0, 630.0, 627.0, 516.0, 587.0, 639.0, 633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 314.0, 313.0, 296.0, 286.0, 314.0, 316.0, 322.0, 317.0, 319.0, 314.0, 291.0, 296.0, 321.0, 306.0, 288.0, 288.0, 266.0, 278.0, 320.0, 316.0, 316.0, 314.0, 319.0, 314.0, 316.0, 317.0, 310.0, 305.0, 298.0, 284.0, 316.0, 320.0, 327.0, 312.0, 319.0, 317.0, 319.0, 314.0, 282.0, 297.0, 310.0, 320.0, 316.0, 317.0, 322.0, 311.0, 308.0, 328.0, 325.0, 314.0, 296.0, 283.0, 314.0, 322.0, 320.0, 310.0, 317.0, 322.0, 308.0, 325.0, 290.0, 292.0, 313.0, 317.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.987279141463972, "mean_processing_ms": 0.26569893386655014, "mean_inference_ms": 1.5631172104354278}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6720000, "num_steps_sampled": 3584000, "sample_time_ms": 21992.586, "load_time_ms": 36.83, "grad_time_ms": 9822.342, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033407146111130714, "policy_loss": -0.005221154540777206, "vf_loss": 91.33563232421875, "vf_explained_var": 0.7713200449943542, "kl": 0.001954694977030158, "entropy": 1.143385887145996, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3584000, "episodes_total": 8960, "training_iteration": 280, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-07-21", "timestamp": 1660255641, "time_this_iter_s": 36.67114806175232, "time_total_s": 14054.863948583603, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14054.863948583603, "timesteps_since_restore": 3584000, "iterations_since_restore": 280, "perf": {"cpu_util_percent": 33.917307692307695, "ram_util_percent": 58.82115384615383}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.365}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.33, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.66, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 627.0, 516.0, 587.0, 639.0, 633.0, 630.0, 567.0, 627.0, 633.0, 584.0, 639.0, 582.0, 633.0, 636.0, 579.0, 636.0, 627.0, 630.0, 582.0, 633.0, 639.0, 627.0, 582.0, 630.0, 633.0, 633.0, 636.0, 578.0, 633.0, 633.0, 639.0, 633.0, 630.0, 587.0, 633.0, 639.0, 636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 308.0, 319.0, 243.0, 273.0, 291.0, 296.0, 320.0, 319.0, 307.0, 326.0, 304.0, 326.0, 293.0, 274.0, 305.0, 322.0, 319.0, 314.0, 293.0, 291.0, 319.0, 320.0, 292.0, 290.0, 324.0, 309.0, 319.0, 317.0, 293.0, 286.0, 320.0, 316.0, 306.0, 321.0, 319.0, 311.0, 291.0, 291.0, 309.0, 324.0, 325.0, 314.0, 314.0, 313.0, 301.0, 281.0, 306.0, 324.0, 306.0, 327.0, 319.0, 314.0, 324.0, 312.0, 287.0, 291.0, 319.0, 314.0, 319.0, 314.0, 324.0, 315.0, 317.0, 316.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.985427132070836, "mean_processing_ms": 0.26533447466026966, "mean_inference_ms": 1.5623431092422566}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6744000, "num_steps_sampled": 3596800, "sample_time_ms": 22602.144, "load_time_ms": 37.106, "grad_time_ms": 10160.693, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006504642311483622, "policy_loss": -0.002157183364033699, "vf_loss": 92.3310546875, "vf_explained_var": 0.768465518951416, "kl": 0.002224028343334794, "entropy": 1.1425694227218628, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3596800, "episodes_total": 8992, "training_iteration": 281, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-08-00", "timestamp": 1660255680, "time_this_iter_s": 39.430299043655396, "time_total_s": 14094.294247627258, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14094.294247627258, "timesteps_since_restore": 3596800, "iterations_since_restore": 281, "perf": {"cpu_util_percent": 32.93571428571428, "ram_util_percent": 58.800000000000004}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 303.4}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.8, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.25, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.74, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.72, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 18, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 14, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0, 630.0, 587.0, 633.0, 639.0, 636.0, 639.0, 630.0, 633.0, 633.0, 639.0, 639.0, 582.0, 294.0, 636.0, 587.0, 579.0, 636.0, 450.0, 579.0, 573.0, 582.0, 582.0, 582.0, 630.0, 636.0, 636.0, 636.0, 579.0, 561.0, 633.0, 636.0, 630.0, 630.0, 630.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 311.0, 319.0, 305.0, 282.0, 315.0, 318.0, 322.0, 317.0, 317.0, 319.0, 324.0, 315.0, 311.0, 319.0, 308.0, 325.0, 311.0, 322.0, 319.0, 320.0, 319.0, 320.0, 286.0, 296.0, 148.0, 146.0, 321.0, 315.0, 294.0, 293.0, 288.0, 291.0, 323.0, 313.0, 224.0, 226.0, 296.0, 283.0, 290.0, 283.0, 289.0, 293.0, 290.0, 292.0, 293.0, 289.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 290.0, 289.0, 279.0, 282.0, 313.0, 320.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9835752103024169, "mean_processing_ms": 0.26496930123552576, "mean_inference_ms": 1.5613527088644699}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6768000, "num_steps_sampled": 3609600, "sample_time_ms": 22700.667, "load_time_ms": 36.992, "grad_time_ms": 10562.273, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004271908197551966, "policy_loss": -0.0035006285179406404, "vf_loss": 83.40963745117188, "vf_explained_var": 0.7725582718849182, "kl": 0.0017563734436407685, "entropy": 1.1368495225906372, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3609600, "episodes_total": 9024, "training_iteration": 282, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-08-36", "timestamp": 1660255716, "time_this_iter_s": 35.46651792526245, "time_total_s": 14129.76076555252, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14129.76076555252, "timesteps_since_restore": 3609600, "iterations_since_restore": 282, "perf": {"cpu_util_percent": 33.821999999999996, "ram_util_percent": 59.328}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 614.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.035}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.87, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.29, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.89, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0, 630.0, 630.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 636.0, 579.0, 579.0, 573.0, 180.0, 587.0, 636.0, 630.0, 579.0, 639.0, 570.0, 639.0, 633.0, 636.0, 582.0, 582.0, 633.0, 627.0, 630.0, 624.0, 624.0, 536.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 311.0, 319.0, 317.0, 313.0, 322.0, 317.0, 311.0, 319.0, 317.0, 319.0, 322.0, 317.0, 319.0, 317.0, 321.0, 312.0, 313.0, 323.0, 277.0, 302.0, 290.0, 289.0, 291.0, 282.0, 91.0, 89.0, 296.0, 291.0, 322.0, 314.0, 316.0, 314.0, 298.0, 281.0, 314.0, 325.0, 284.0, 286.0, 311.0, 328.0, 308.0, 325.0, 311.0, 325.0, 294.0, 288.0, 293.0, 289.0, 321.0, 312.0, 316.0, 311.0, 317.0, 313.0, 327.0, 297.0, 321.0, 303.0, 275.0, 261.0, 314.0, 322.0, 314.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9817221783710357, "mean_processing_ms": 0.2646010611587108, "mean_inference_ms": 1.5601389392518195}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6792000, "num_steps_sampled": 3622400, "sample_time_ms": 22788.728, "load_time_ms": 36.79, "grad_time_ms": 10687.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030612878035753965, "policy_loss": -0.0045290542766451836, "vf_loss": 81.5626449584961, "vf_explained_var": 0.7761082053184509, "kl": 0.0021392148919403553, "entropy": 1.131847858428955, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3622400, "episodes_total": 9056, "training_iteration": 283, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-09-09", "timestamp": 1660255749, "time_this_iter_s": 33.67844009399414, "time_total_s": 14163.439205646515, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14163.439205646515, "timesteps_since_restore": 3622400, "iterations_since_restore": 283, "perf": {"cpu_util_percent": 34.11489361702128, "ram_util_percent": 59.04893617021279}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.985}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.97, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.13, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.56, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.97, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.56, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.97, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.56, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.97, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0, 636.0, 587.0, 639.0, 639.0, 579.0, 596.0, 587.0, 627.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 582.0, 630.0, 633.0, 636.0, 630.0, 590.0, 636.0, 633.0, 573.0, 558.0, 636.0, 636.0, 636.0, 630.0, 639.0, 636.0, 522.0, 587.0, 636.0, 636.0, 587.0, 639.0, 639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 316.0, 320.0, 303.0, 284.0, 314.0, 325.0, 319.0, 320.0, 283.0, 296.0, 300.0, 296.0, 297.0, 290.0, 315.0, 312.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 314.0, 322.0, 322.0, 314.0, 317.0, 319.0, 294.0, 288.0, 306.0, 324.0, 311.0, 322.0, 316.0, 320.0, 316.0, 314.0, 304.0, 286.0, 324.0, 312.0, 317.0, 316.0, 286.0, 287.0, 278.0, 280.0, 319.0, 317.0, 329.0, 307.0, 311.0, 325.0, 318.0, 312.0, 312.0, 327.0, 319.0, 317.0, 262.0, 260.0, 294.0, 293.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9798702213008988, "mean_processing_ms": 0.26423169692571163, "mean_inference_ms": 1.5586086454060646}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6816000, "num_steps_sampled": 3635200, "sample_time_ms": 22897.149, "load_time_ms": 36.929, "grad_time_ms": 10771.125, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005141934845596552, "policy_loss": -0.003231912851333618, "vf_loss": 89.45598602294922, "vf_explained_var": 0.7527138590812683, "kl": 0.0021111962851136923, "entropy": 1.1434991359710693, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3635200, "episodes_total": 9088, "training_iteration": 284, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-09-42", "timestamp": 1660255782, "time_this_iter_s": 32.81455707550049, "time_total_s": 14196.253762722015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14196.253762722015, "timesteps_since_restore": 3635200, "iterations_since_restore": 284, "perf": {"cpu_util_percent": 32.710638297872336, "ram_util_percent": 58.93617021276594}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 616.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 308.355}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.51, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0, 636.0, 636.0, 587.0, 639.0, 639.0, 573.0, 636.0, 630.0, 636.0, 630.0, 633.0, 579.0, 636.0, 639.0, 630.0, 633.0, 639.0, 639.0, 582.0, 636.0, 633.0, 630.0, 567.0, 633.0, 627.0, 627.0, 582.0, 639.0, 633.0, 636.0, 579.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 322.0, 314.0, 288.0, 299.0, 322.0, 317.0, 317.0, 322.0, 292.0, 281.0, 308.0, 328.0, 304.0, 326.0, 317.0, 319.0, 319.0, 311.0, 311.0, 322.0, 284.0, 295.0, 313.0, 323.0, 319.0, 320.0, 319.0, 311.0, 316.0, 317.0, 319.0, 320.0, 321.0, 318.0, 283.0, 299.0, 316.0, 320.0, 316.0, 317.0, 316.0, 314.0, 279.0, 288.0, 313.0, 320.0, 313.0, 314.0, 311.0, 316.0, 288.0, 294.0, 319.0, 320.0, 314.0, 319.0, 319.0, 317.0, 294.0, 285.0, 318.0, 312.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.978026720054701, "mean_processing_ms": 0.2638645692972578, "mean_inference_ms": 1.5570587102664553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6840000, "num_steps_sampled": 3648000, "sample_time_ms": 23082.722, "load_time_ms": 36.933, "grad_time_ms": 10916.497, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002097133779898286, "policy_loss": -0.005807385314255953, "vf_loss": 84.70693969726562, "vf_explained_var": 0.7814067006111145, "kl": 0.0015371787594631314, "entropy": 1.1323403120040894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3648000, "episodes_total": 9120, "training_iteration": 285, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-10-15", "timestamp": 1660255815, "time_this_iter_s": 33.031522035598755, "time_total_s": 14229.285284757614, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14229.285284757614, "timesteps_since_restore": 3648000, "iterations_since_restore": 285, "perf": {"cpu_util_percent": 32.80434782608696, "ram_util_percent": 58.88260869565216}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 616.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.135}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.47, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.33, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.8, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 636.0, 587.0, 639.0, 633.0, 630.0, 579.0, 582.0, 636.0, 575.0, 587.0, 582.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 633.0, 593.0, 587.0, 636.0, 630.0, 627.0, 630.0, 636.0, 633.0, 630.0, 630.0, 573.0, 627.0, 579.0, 596.0, 579.0, 630.0, 633.0, 582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 317.0, 319.0, 322.0, 314.0, 298.0, 289.0, 322.0, 317.0, 321.0, 312.0, 311.0, 319.0, 288.0, 291.0, 286.0, 296.0, 311.0, 325.0, 291.0, 284.0, 306.0, 281.0, 294.0, 288.0, 314.0, 316.0, 311.0, 319.0, 319.0, 317.0, 316.0, 317.0, 305.0, 322.0, 308.0, 319.0, 314.0, 319.0, 288.0, 305.0, 299.0, 288.0, 319.0, 317.0, 316.0, 314.0, 318.0, 309.0, 311.0, 319.0, 319.0, 317.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 287.0, 286.0, 311.0, 316.0, 291.0, 288.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9761954337234573, "mean_processing_ms": 0.26349990327404404, "mean_inference_ms": 1.5554986152813894}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6864000, "num_steps_sampled": 3660800, "sample_time_ms": 23037.067, "load_time_ms": 36.944, "grad_time_ms": 11090.889, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016592548927292228, "policy_loss": -0.006142645608633757, "vf_loss": 83.6804428100586, "vf_explained_var": 0.7674832344055176, "kl": 0.0020798875484615564, "entropy": 1.1322760581970215, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3660800, "episodes_total": 9152, "training_iteration": 286, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-10-47", "timestamp": 1660255847, "time_this_iter_s": 31.660379886627197, "time_total_s": 14260.945664644241, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14260.945664644241, "timesteps_since_restore": 3660800, "iterations_since_restore": 286, "perf": {"cpu_util_percent": 33.97111111111111, "ram_util_percent": 58.86666666666669}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 618.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 309.115}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.83, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.54, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.68, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0, 596.0, 579.0, 630.0, 633.0, 582.0, 636.0, 633.0, 639.0, 630.0, 636.0, 636.0, 636.0, 627.0, 633.0, 582.0, 636.0, 627.0, 180.0, 639.0, 633.0, 639.0, 636.0, 636.0, 633.0, 633.0, 539.0, 630.0, 636.0, 639.0, 636.0, 630.0, 587.0, 633.0, 636.0, 636.0, 636.0, 633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 294.0, 302.0, 291.0, 288.0, 314.0, 316.0, 311.0, 322.0, 288.0, 294.0, 317.0, 319.0, 317.0, 316.0, 322.0, 317.0, 316.0, 314.0, 327.0, 309.0, 316.0, 320.0, 326.0, 310.0, 316.0, 311.0, 326.0, 307.0, 288.0, 294.0, 311.0, 325.0, 308.0, 319.0, 91.0, 89.0, 317.0, 322.0, 324.0, 309.0, 324.0, 315.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 314.0, 319.0, 268.0, 271.0, 311.0, 319.0, 314.0, 322.0, 322.0, 317.0, 322.0, 314.0, 329.0, 301.0, 296.0, 291.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9743700047220856, "mean_processing_ms": 0.2631349992390798, "mean_inference_ms": 1.553816906350355}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6888000, "num_steps_sampled": 3673600, "sample_time_ms": 22960.493, "load_time_ms": 36.753, "grad_time_ms": 11340.647, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018661068752408028, "policy_loss": -0.005918534938246012, "vf_loss": 83.52860260009766, "vf_explained_var": 0.7654721140861511, "kl": 0.0018988008378073573, "entropy": 1.136439561843872, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3673600, "episodes_total": 9184, "training_iteration": 287, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-11-18", "timestamp": 1660255878, "time_this_iter_s": 31.607279777526855, "time_total_s": 14292.552944421768, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14292.552944421768, "timesteps_since_restore": 3673600, "iterations_since_restore": 287, "perf": {"cpu_util_percent": 34.857777777777784, "ram_util_percent": 58.80666666666665}} -{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 622.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 311.005}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 190.81, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.43, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.79, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0, 633.0, 636.0, 636.0, 636.0, 633.0, 582.0, 624.0, 636.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 639.0, 639.0, 630.0, 639.0, 579.0, 633.0, 582.0, 587.0, 636.0, 639.0, 587.0, 636.0, 578.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 314.0, 319.0, 306.0, 330.0, 314.0, 322.0, 324.0, 312.0, 322.0, 311.0, 296.0, 286.0, 313.0, 311.0, 314.0, 322.0, 309.0, 321.0, 319.0, 311.0, 318.0, 315.0, 325.0, 311.0, 314.0, 322.0, 312.0, 324.0, 319.0, 311.0, 313.0, 320.0, 325.0, 305.0, 324.0, 306.0, 319.0, 320.0, 317.0, 322.0, 313.0, 317.0, 316.0, 323.0, 288.0, 291.0, 319.0, 314.0, 296.0, 286.0, 295.0, 292.0, 332.0, 304.0, 319.0, 320.0, 296.0, 291.0, 316.0, 320.0, 282.0, 296.0, 314.0, 325.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9725594290722099, "mean_processing_ms": 0.2627719670023304, "mean_inference_ms": 1.5521036278405136}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6912000, "num_steps_sampled": 3686400, "sample_time_ms": 23043.972, "load_time_ms": 36.653, "grad_time_ms": 11285.002, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023610887583345175, "policy_loss": -0.005447230767458677, "vf_loss": 83.72765350341797, "vf_explained_var": 0.7662909030914307, "kl": 0.001831754925660789, "entropy": 1.128881573677063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3686400, "episodes_total": 9216, "training_iteration": 288, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-11-51", "timestamp": 1660255911, "time_this_iter_s": 32.35726475715637, "time_total_s": 14324.910209178925, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14324.910209178925, "timesteps_since_restore": 3686400, "iterations_since_restore": 288, "perf": {"cpu_util_percent": 32.42, "ram_util_percent": 58.875555555555565}} -{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 621.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 263.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 310.955}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 190.71, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.17, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.98, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.24, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.24, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.71, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.24, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.24, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0, 630.0, 576.0, 630.0, 582.0, 636.0, 579.0, 627.0, 630.0, 636.0, 639.0, 633.0, 633.0, 636.0, 627.0, 636.0, 630.0, 582.0, 627.0, 636.0, 636.0, 636.0, 587.0, 636.0, 633.0, 636.0, 633.0, 633.0, 630.0, 633.0, 636.0, 627.0, 579.0, 639.0, 582.0, 546.0, 636.0, 584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 316.0, 314.0, 293.0, 283.0, 323.0, 307.0, 291.0, 291.0, 313.0, 323.0, 299.0, 280.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 319.0, 314.0, 316.0, 317.0, 314.0, 322.0, 324.0, 303.0, 307.0, 329.0, 307.0, 323.0, 293.0, 289.0, 311.0, 316.0, 314.0, 322.0, 319.0, 317.0, 313.0, 323.0, 283.0, 304.0, 319.0, 317.0, 319.0, 314.0, 314.0, 322.0, 311.0, 322.0, 311.0, 322.0, 314.0, 316.0, 322.0, 311.0, 319.0, 317.0, 313.0, 314.0, 282.0, 297.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9707646787589033, "mean_processing_ms": 0.26241176618575823, "mean_inference_ms": 1.5504455690384487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6936000, "num_steps_sampled": 3699200, "sample_time_ms": 22728.739, "load_time_ms": 36.546, "grad_time_ms": 11195.454, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004752982931677252, "policy_loss": -0.00697875814512372, "vf_loss": 80.24703979492188, "vf_explained_var": 0.7700864672660828, "kl": 0.001980138709768653, "entropy": 1.1412941217422485, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3699200, "episodes_total": 9248, "training_iteration": 289, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-12-24", "timestamp": 1660255944, "time_this_iter_s": 33.15079879760742, "time_total_s": 14358.061007976532, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14358.061007976532, "timesteps_since_restore": 3699200, "iterations_since_restore": 289, "perf": {"cpu_util_percent": 32.6936170212766, "ram_util_percent": 58.93829787234045}} -{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 617.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 308.815}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.63, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 19.11, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.35, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.81, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.76, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0, 639.0, 582.0, 546.0, 636.0, 584.0, 627.0, 633.0, 627.0, 584.0, 633.0, 636.0, 639.0, 639.0, 630.0, 587.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 630.0, 624.0, 636.0, 633.0, 587.0, 639.0, 630.0, 639.0, 587.0, 639.0, 636.0, 630.0, 630.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 317.0, 322.0, 293.0, 289.0, 283.0, 263.0, 323.0, 313.0, 285.0, 299.0, 316.0, 311.0, 324.0, 309.0, 318.0, 309.0, 289.0, 295.0, 314.0, 319.0, 316.0, 320.0, 319.0, 320.0, 317.0, 322.0, 314.0, 316.0, 293.0, 294.0, 318.0, 312.0, 288.0, 294.0, 289.0, 293.0, 292.0, 292.0, 315.0, 315.0, 324.0, 312.0, 324.0, 306.0, 313.0, 311.0, 314.0, 322.0, 311.0, 322.0, 295.0, 292.0, 322.0, 317.0, 308.0, 322.0, 314.0, 325.0, 293.0, 294.0, 317.0, 322.0, 318.0, 318.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9689852254621897, "mean_processing_ms": 0.26205467993702586, "mean_inference_ms": 1.548820818018191}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6960000, "num_steps_sampled": 3712000, "sample_time_ms": 22386.717, "load_time_ms": 37.444, "grad_time_ms": 11069.639, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020440255757421255, "policy_loss": -0.0058852084912359715, "vf_loss": 84.9912338256836, "vf_explained_var": 0.7650973200798035, "kl": 0.0021299307700246572, "entropy": 1.1397589445114136, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3712000, "episodes_total": 9280, "training_iteration": 290, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-12-56", "timestamp": 1660255976, "time_this_iter_s": 32.003324031829834, "time_total_s": 14390.064332008362, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14390.064332008362, "timesteps_since_restore": 3712000, "iterations_since_restore": 290, "perf": {"cpu_util_percent": 30.039130434782606, "ram_util_percent": 58.817391304347815}} -{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 614.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.38}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.76, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 19.15, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.2, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.2, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.2, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0, 630.0, 630.0, 636.0, 636.0, 636.0, 636.0, 633.0, 636.0, 639.0, 627.0, 627.0, 639.0, 639.0, 636.0, 630.0, 636.0, 630.0, 633.0, 639.0, 579.0, 587.0, 630.0, 633.0, 630.0, 639.0, 582.0, 630.0, 639.0, 636.0, 630.0, 573.0, 582.0, 630.0, 582.0, 636.0, 639.0, 590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 319.0, 311.0, 314.0, 316.0, 319.0, 317.0, 314.0, 322.0, 324.0, 312.0, 319.0, 317.0, 321.0, 312.0, 318.0, 318.0, 319.0, 320.0, 305.0, 322.0, 310.0, 317.0, 319.0, 320.0, 314.0, 325.0, 321.0, 315.0, 311.0, 319.0, 311.0, 325.0, 311.0, 319.0, 311.0, 322.0, 319.0, 320.0, 285.0, 294.0, 298.0, 289.0, 319.0, 311.0, 313.0, 320.0, 308.0, 322.0, 319.0, 320.0, 293.0, 289.0, 319.0, 311.0, 324.0, 315.0, 314.0, 322.0, 311.0, 319.0, 276.0, 297.0, 286.0, 296.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.967235186119729, "mean_processing_ms": 0.26170596959071135, "mean_inference_ms": 1.5471921568344904}, "off_policy_estimator": {}, "info": {"num_steps_trained": 6984000, "num_steps_sampled": 3724800, "sample_time_ms": 21912.426, "load_time_ms": 37.292, "grad_time_ms": 10653.236, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003775561461225152, "policy_loss": -0.004369485657662153, "vf_loss": 87.11421966552734, "vf_explained_var": 0.7634318470954895, "kl": 0.0017795447492972016, "entropy": 1.1327377557754517, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3724800, "episodes_total": 9312, "training_iteration": 291, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-13-27", "timestamp": 1660256007, "time_this_iter_s": 30.522319793701172, "time_total_s": 14420.586651802063, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14420.586651802063, "timesteps_since_restore": 3724800, "iterations_since_restore": 291, "perf": {"cpu_util_percent": 38.19767441860465, "ram_util_percent": 59.255813953488385}} -{"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 610.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.465}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 187.73, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.16, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.04, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.25, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.77, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0, 630.0, 582.0, 636.0, 639.0, 590.0, 633.0, 584.0, 636.0, 630.0, 630.0, 633.0, 587.0, 636.0, 630.0, 582.0, 405.0, 633.0, 630.0, 627.0, 579.0, 576.0, 587.0, 582.0, 636.0, 636.0, 618.0, 636.0, 630.0, 636.0, 624.0, 639.0, 633.0, 630.0, 587.0, 587.0, 630.0, 544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 308.0, 322.0, 286.0, 296.0, 324.0, 312.0, 321.0, 318.0, 293.0, 297.0, 316.0, 317.0, 290.0, 294.0, 317.0, 319.0, 311.0, 319.0, 323.0, 307.0, 316.0, 317.0, 292.0, 295.0, 319.0, 317.0, 313.0, 317.0, 281.0, 301.0, 198.0, 207.0, 313.0, 320.0, 310.0, 320.0, 311.0, 316.0, 282.0, 297.0, 285.0, 291.0, 309.0, 278.0, 293.0, 289.0, 311.0, 325.0, 321.0, 315.0, 302.0, 316.0, 314.0, 322.0, 311.0, 319.0, 319.0, 317.0, 319.0, 305.0, 319.0, 320.0, 322.0, 311.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9654876642180179, "mean_processing_ms": 0.26135729355980103, "mean_inference_ms": 1.545357165029807}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7008000, "num_steps_sampled": 3737600, "sample_time_ms": 21647.86, "load_time_ms": 37.443, "grad_time_ms": 10160.016, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012383932480588555, "policy_loss": -0.006961038801819086, "vf_loss": 87.61873626708984, "vf_explained_var": 0.757759153842926, "kl": 0.001912236213684082, "entropy": 1.1248730421066284, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3737600, "episodes_total": 9344, "training_iteration": 292, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-13-55", "timestamp": 1660256035, "time_this_iter_s": 27.889997720718384, "time_total_s": 14448.476649522781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14448.476649522781, "timesteps_since_restore": 3737600, "iterations_since_restore": 292, "perf": {"cpu_util_percent": 31.551282051282044, "ram_util_percent": 59.09743589743588}} -{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 610.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 305.435}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.67, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.18, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.39, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.87, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0, 630.0, 587.0, 587.0, 630.0, 544.0, 633.0, 627.0, 618.0, 636.0, 627.0, 581.0, 582.0, 639.0, 587.0, 639.0, 573.0, 633.0, 636.0, 587.0, 639.0, 564.0, 639.0, 630.0, 633.0, 558.0, 582.0, 630.0, 630.0, 587.0, 624.0, 636.0, 582.0, 587.0, 633.0, 587.0, 576.0, 582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 326.0, 304.0, 301.0, 286.0, 293.0, 294.0, 313.0, 317.0, 267.0, 277.0, 319.0, 314.0, 311.0, 316.0, 315.0, 303.0, 309.0, 327.0, 305.0, 322.0, 284.0, 297.0, 290.0, 292.0, 311.0, 328.0, 288.0, 299.0, 317.0, 322.0, 292.0, 281.0, 316.0, 317.0, 324.0, 312.0, 293.0, 294.0, 322.0, 317.0, 287.0, 277.0, 319.0, 320.0, 318.0, 312.0, 322.0, 311.0, 292.0, 266.0, 288.0, 294.0, 310.0, 320.0, 314.0, 316.0, 293.0, 294.0, 311.0, 313.0, 319.0, 317.0, 289.0, 293.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9637668687735891, "mean_processing_ms": 0.2610164264718474, "mean_inference_ms": 1.5436541723929016}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7032000, "num_steps_sampled": 3750400, "sample_time_ms": 21753.951, "load_time_ms": 37.441, "grad_time_ms": 10065.741, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0023420238867402077, "policy_loss": -0.005699212197214365, "vf_loss": 86.0804214477539, "vf_explained_var": 0.7711854577064514, "kl": 0.0016376747516915202, "entropy": 1.1336089372634888, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3750400, "episodes_total": 9376, "training_iteration": 293, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-14-28", "timestamp": 1660256068, "time_this_iter_s": 33.79537034034729, "time_total_s": 14482.272019863129, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14482.272019863129, "timesteps_since_restore": 3750400, "iterations_since_restore": 293, "perf": {"cpu_util_percent": 29.666666666666668, "ram_util_percent": 58.67083333333335}} -{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 615.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.66}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.52, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.56, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.62, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.84, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.24, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.12, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.12, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.12, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0, 587.0, 633.0, 587.0, 576.0, 582.0, 639.0, 633.0, 633.0, 633.0, 567.0, 636.0, 633.0, 633.0, 636.0, 636.0, 558.0, 584.0, 639.0, 639.0, 633.0, 567.0, 590.0, 636.0, 633.0, 636.0, 630.0, 582.0, 630.0, 636.0, 558.0, 639.0, 584.0, 587.0, 544.0, 636.0, 633.0, 555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 295.0, 292.0, 316.0, 317.0, 306.0, 281.0, 283.0, 293.0, 289.0, 293.0, 325.0, 314.0, 319.0, 314.0, 309.0, 324.0, 311.0, 322.0, 277.0, 290.0, 316.0, 320.0, 325.0, 308.0, 312.0, 321.0, 327.0, 309.0, 318.0, 318.0, 274.0, 284.0, 288.0, 296.0, 324.0, 315.0, 319.0, 320.0, 324.0, 309.0, 279.0, 288.0, 301.0, 289.0, 312.0, 324.0, 311.0, 322.0, 319.0, 317.0, 321.0, 309.0, 291.0, 291.0, 319.0, 311.0, 317.0, 319.0, 273.0, 285.0, 322.0, 317.0, 299.0, 285.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9620401402665292, "mean_processing_ms": 0.2606729789750365, "mean_inference_ms": 1.5418645190919325}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7056000, "num_steps_sampled": 3763200, "sample_time_ms": 21541.312, "load_time_ms": 37.573, "grad_time_ms": 10164.498, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031338699627667665, "policy_loss": -0.004722007550299168, "vf_loss": 84.24658966064453, "vf_explained_var": 0.7650328278541565, "kl": 0.0023102371487766504, "entropy": 1.1375713348388672, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3763200, "episodes_total": 9408, "training_iteration": 294, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-15-00", "timestamp": 1660256100, "time_this_iter_s": 31.67550492286682, "time_total_s": 14513.947524785995, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14513.947524785995, "timesteps_since_restore": 3763200, "iterations_since_restore": 294, "perf": {"cpu_util_percent": 30.09333333333334, "ram_util_percent": 58.68444444444443}} -{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 616.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 308.195}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 189.19, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.41, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.64, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.79, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.21, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.3, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.48, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.85, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.79, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.3, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.3, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0, 587.0, 544.0, 636.0, 633.0, 555.0, 633.0, 564.0, 639.0, 581.0, 636.0, 630.0, 639.0, 630.0, 590.0, 639.0, 636.0, 639.0, 627.0, 630.0, 639.0, 639.0, 636.0, 633.0, 630.0, 636.0, 639.0, 630.0, 639.0, 584.0, 633.0, 633.0, 636.0, 339.0, 579.0, 579.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 295.0, 292.0, 275.0, 269.0, 319.0, 317.0, 317.0, 316.0, 285.0, 270.0, 316.0, 317.0, 293.0, 271.0, 311.0, 328.0, 295.0, 286.0, 324.0, 312.0, 321.0, 309.0, 316.0, 323.0, 311.0, 319.0, 299.0, 291.0, 319.0, 320.0, 314.0, 322.0, 321.0, 318.0, 316.0, 311.0, 311.0, 319.0, 315.0, 324.0, 314.0, 325.0, 316.0, 320.0, 311.0, 322.0, 300.0, 330.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 314.0, 325.0, 288.0, 296.0, 318.0, 315.0, 319.0, 314.0, 316.0, 320.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9603539643629071, "mean_processing_ms": 0.26034177347662363, "mean_inference_ms": 1.5408082131746255}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7080000, "num_steps_sampled": 3776000, "sample_time_ms": 22187.522, "load_time_ms": 38.247, "grad_time_ms": 10375.626, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001668116747168824, "policy_loss": -0.007916351780295372, "vf_loss": 83.1385726928711, "vf_explained_var": 0.7759819626808167, "kl": 0.0019673772621899843, "entropy": 1.1286202669143677, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3776000, "episodes_total": 9440, "training_iteration": 295, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-15-42", "timestamp": 1660256142, "time_this_iter_s": 41.61074709892273, "time_total_s": 14555.558271884918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14555.558271884918, "timesteps_since_restore": 3776000, "iterations_since_restore": 295, "perf": {"cpu_util_percent": 32.182758620689654, "ram_util_percent": 58.76206896551724}} -{"episode_reward_max": 639.0, "episode_reward_min": 339.0, "episode_reward_mean": 614.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 167.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 307.085}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.57, "shaped_reward_min": 99, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 19.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.39, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.14, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.83, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.14, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.14, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0, 339.0, 579.0, 579.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 630.0, 636.0, 639.0, 630.0, 630.0, 630.0, 633.0, 630.0, 630.0, 582.0, 627.0, 633.0, 636.0, 636.0, 576.0, 630.0, 627.0, 627.0, 636.0, 561.0, 579.0, 636.0, 639.0, 639.0, 633.0, 630.0, 569.0, 639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 167.0, 172.0, 288.0, 291.0, 285.0, 294.0, 293.0, 294.0, 316.0, 314.0, 306.0, 330.0, 327.0, 306.0, 319.0, 317.0, 324.0, 312.0, 318.0, 312.0, 314.0, 322.0, 314.0, 325.0, 319.0, 311.0, 313.0, 317.0, 316.0, 314.0, 313.0, 320.0, 313.0, 317.0, 316.0, 314.0, 299.0, 283.0, 320.0, 307.0, 312.0, 321.0, 322.0, 314.0, 319.0, 317.0, 288.0, 288.0, 312.0, 318.0, 316.0, 311.0, 313.0, 314.0, 314.0, 322.0, 279.0, 282.0, 299.0, 280.0, 314.0, 322.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9586694910648496, "mean_processing_ms": 0.26000987158476824, "mean_inference_ms": 1.539770678675762}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7104000, "num_steps_sampled": 3788800, "sample_time_ms": 22271.193, "load_time_ms": 38.04, "grad_time_ms": 10225.976, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004963720217347145, "policy_loss": -0.003322723088786006, "vf_loss": 88.4856948852539, "vf_explained_var": 0.7634937167167664, "kl": 0.0021246925462037325, "entropy": 1.1242562532424927, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3788800, "episodes_total": 9472, "training_iteration": 296, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-16-13", "timestamp": 1660256173, "time_this_iter_s": 30.998157024383545, "time_total_s": 14586.556428909302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14586.556428909302, "timesteps_since_restore": 3788800, "iterations_since_restore": 296, "perf": {"cpu_util_percent": 31.388636363636365, "ram_util_percent": 58.774999999999984}} -{"episode_reward_max": 639.0, "episode_reward_min": 413.0, "episode_reward_mean": 616.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 308.395}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.99, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.2, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.4, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.34, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.77, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.34, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.34, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 633.0, 630.0, 569.0, 639.0, 633.0, 573.0, 596.0, 636.0, 633.0, 630.0, 627.0, 636.0, 630.0, 639.0, 630.0, 539.0, 633.0, 633.0, 630.0, 639.0, 630.0, 587.0, 587.0, 633.0, 633.0, 639.0, 639.0, 413.0, 633.0, 636.0, 636.0, 587.0, 636.0, 627.0, 633.0, 618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 316.0, 317.0, 318.0, 312.0, 288.0, 281.0, 319.0, 320.0, 317.0, 316.0, 288.0, 285.0, 293.0, 303.0, 324.0, 312.0, 321.0, 312.0, 322.0, 308.0, 313.0, 314.0, 316.0, 320.0, 321.0, 309.0, 325.0, 314.0, 317.0, 313.0, 265.0, 274.0, 313.0, 320.0, 319.0, 314.0, 317.0, 313.0, 317.0, 322.0, 321.0, 309.0, 306.0, 281.0, 292.0, 295.0, 310.0, 323.0, 319.0, 314.0, 319.0, 320.0, 319.0, 320.0, 199.0, 214.0, 321.0, 312.0, 319.0, 317.0, 319.0, 317.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9570006029896723, "mean_processing_ms": 0.25968048709196123, "mean_inference_ms": 1.5389220446317904}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7128000, "num_steps_sampled": 3801600, "sample_time_ms": 22480.496, "load_time_ms": 38.114, "grad_time_ms": 10354.328, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004928060807287693, "policy_loss": -0.0034768336918205023, "vf_loss": 89.6873550415039, "vf_explained_var": 0.7655234336853027, "kl": 0.0019178093643859029, "entropy": 1.1276906728744507, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3801600, "episodes_total": 9504, "training_iteration": 297, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-16-48", "timestamp": 1660256208, "time_this_iter_s": 34.984565019607544, "time_total_s": 14621.54099392891, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14621.54099392891, "timesteps_since_restore": 3801600, "iterations_since_restore": 297, "perf": {"cpu_util_percent": 30.266, "ram_util_percent": 58.788000000000004}} -{"episode_reward_max": 639.0, "episode_reward_min": 433.0, "episode_reward_mean": 615.99, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 333.0}, "policy_reward_mean": {"ppo": 307.995}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.59, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.45, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.92, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.96, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.16, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.99, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.99, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.99, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0, 587.0, 636.0, 627.0, 633.0, 618.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 587.0, 633.0, 639.0, 587.0, 630.0, 575.0, 473.0, 633.0, 627.0, 636.0, 639.0, 636.0, 636.0, 633.0, 633.0, 639.0, 630.0, 433.0, 630.0, 633.0, 636.0, 633.0, 639.0, 576.0, 582.0, 587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 295.0, 292.0, 322.0, 314.0, 308.0, 319.0, 317.0, 316.0, 300.0, 318.0, 317.0, 322.0, 324.0, 309.0, 316.0, 314.0, 322.0, 317.0, 316.0, 314.0, 320.0, 316.0, 290.0, 297.0, 319.0, 314.0, 317.0, 322.0, 293.0, 294.0, 312.0, 318.0, 284.0, 291.0, 237.0, 236.0, 319.0, 314.0, 317.0, 310.0, 322.0, 314.0, 306.0, 333.0, 319.0, 317.0, 316.0, 320.0, 316.0, 317.0, 318.0, 315.0, 317.0, 322.0, 318.0, 312.0, 218.0, 215.0, 319.0, 311.0, 314.0, 319.0, 316.0, 320.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9553216188480552, "mean_processing_ms": 0.25934597014735267, "mean_inference_ms": 1.5375890964227674}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7152000, "num_steps_sampled": 3814400, "sample_time_ms": 22450.487, "load_time_ms": 38.169, "grad_time_ms": 10228.148, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004324051551520824, "policy_loss": -0.003937617409974337, "vf_loss": 88.24027252197266, "vf_explained_var": 0.7693286538124084, "kl": 0.00227510672993958, "entropy": 1.1247196197509766, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3814400, "episodes_total": 9536, "training_iteration": 298, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-17-18", "timestamp": 1660256238, "time_this_iter_s": 30.79404616355896, "time_total_s": 14652.335040092468, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14652.335040092468, "timesteps_since_restore": 3814400, "iterations_since_restore": 298, "perf": {"cpu_util_percent": 31.34418604651162, "ram_util_percent": 58.76279069767441}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 615.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.765}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.53, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.1, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.27, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.85, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.43, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.73, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.43, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.43, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0, 633.0, 639.0, 576.0, 582.0, 587.0, 579.0, 639.0, 636.0, 633.0, 630.0, 582.0, 636.0, 633.0, 636.0, 636.0, 639.0, 579.0, 584.0, 630.0, 639.0, 627.0, 527.0, 630.0, 582.0, 630.0, 639.0, 636.0, 633.0, 639.0, 639.0, 627.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 286.0, 290.0, 295.0, 287.0, 293.0, 294.0, 278.0, 301.0, 316.0, 323.0, 317.0, 319.0, 307.0, 326.0, 315.0, 315.0, 290.0, 292.0, 325.0, 311.0, 316.0, 317.0, 314.0, 322.0, 316.0, 320.0, 313.0, 326.0, 291.0, 288.0, 294.0, 290.0, 310.0, 320.0, 320.0, 319.0, 314.0, 313.0, 258.0, 269.0, 317.0, 313.0, 296.0, 286.0, 321.0, 309.0, 312.0, 327.0, 317.0, 319.0, 321.0, 312.0, 314.0, 325.0, 314.0, 325.0, 316.0, 311.0, 298.0, 281.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9536494258770276, "mean_processing_ms": 0.25901133252507974, "mean_inference_ms": 1.536150189883149}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7176000, "num_steps_sampled": 3827200, "sample_time_ms": 22404.117, "load_time_ms": 38.01, "grad_time_ms": 10081.123, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014618774875998497, "policy_loss": -0.00924667902290821, "vf_loss": 83.49740600585938, "vf_explained_var": 0.7685635685920715, "kl": 0.0018500644946470857, "entropy": 1.12986421585083, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3827200, "episodes_total": 9568, "training_iteration": 299, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-17-50", "timestamp": 1660256270, "time_this_iter_s": 31.213135242462158, "time_total_s": 14683.54817533493, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14683.54817533493, "timesteps_since_restore": 3827200, "iterations_since_restore": 299, "perf": {"cpu_util_percent": 30.386363636363637, "ram_util_percent": 58.77272727272726}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 616.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.425}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.65, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.79, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.19, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.8, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.68, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.69, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.07, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.27, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.69, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.69, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0, 627.0, 639.0, 639.0, 639.0, 639.0, 624.0, 576.0, 639.0, 636.0, 630.0, 522.0, 587.0, 639.0, 578.0, 636.0, 584.0, 639.0, 522.0, 630.0, 633.0, 633.0, 576.0, 567.0, 633.0, 636.0, 636.0, 587.0, 627.0, 636.0, 627.0, 639.0, 636.0, 587.0, 630.0, 633.0, 636.0, 630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 322.0, 305.0, 317.0, 322.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 312.0, 312.0, 296.0, 280.0, 317.0, 322.0, 313.0, 323.0, 311.0, 319.0, 269.0, 253.0, 296.0, 291.0, 319.0, 320.0, 313.0, 265.0, 319.0, 317.0, 293.0, 291.0, 314.0, 325.0, 254.0, 268.0, 321.0, 309.0, 309.0, 324.0, 314.0, 319.0, 279.0, 297.0, 286.0, 281.0, 311.0, 322.0, 317.0, 319.0, 317.0, 319.0, 286.0, 301.0, 313.0, 314.0, 319.0, 317.0, 316.0, 311.0, 321.0, 318.0, 317.0, 319.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9519895892120465, "mean_processing_ms": 0.2586797182005827, "mean_inference_ms": 1.5346818190110434}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7200000, "num_steps_sampled": 3840000, "sample_time_ms": 22522.727, "load_time_ms": 37.151, "grad_time_ms": 10013.326, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0025208042934536934, "policy_loss": -0.005239995662122965, "vf_loss": 83.27434539794922, "vf_explained_var": 0.7729237675666809, "kl": 0.0018271300941705704, "entropy": 1.133251667022705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3840000, "episodes_total": 9600, "training_iteration": 300, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-18-22", "timestamp": 1660256302, "time_this_iter_s": 32.498526096343994, "time_total_s": 14716.046701431274, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14716.046701431274, "timesteps_since_restore": 3840000, "iterations_since_restore": 300, "perf": {"cpu_util_percent": 33.44782608695652, "ram_util_percent": 58.9717391304348}} -{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 617.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.925}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.85, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.53, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.02, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.7, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0, 587.0, 630.0, 633.0, 636.0, 630.0, 636.0, 630.0, 636.0, 573.0, 630.0, 587.0, 639.0, 582.0, 624.0, 630.0, 639.0, 587.0, 590.0, 639.0, 639.0, 636.0, 633.0, 579.0, 633.0, 633.0, 587.0, 630.0, 630.0, 525.0, 633.0, 636.0, 582.0, 633.0, 581.0, 582.0, 636.0, 587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 291.0, 296.0, 307.0, 323.0, 313.0, 320.0, 323.0, 313.0, 316.0, 314.0, 313.0, 323.0, 313.0, 317.0, 319.0, 317.0, 293.0, 280.0, 316.0, 314.0, 289.0, 298.0, 319.0, 320.0, 288.0, 294.0, 313.0, 311.0, 310.0, 320.0, 321.0, 318.0, 291.0, 296.0, 296.0, 294.0, 312.0, 327.0, 317.0, 322.0, 329.0, 307.0, 319.0, 314.0, 285.0, 294.0, 316.0, 317.0, 316.0, 317.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 258.0, 267.0, 311.0, 322.0, 314.0, 322.0, 291.0, 291.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9503424273012522, "mean_processing_ms": 0.25835032935285485, "mean_inference_ms": 1.5332766289322552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7224000, "num_steps_sampled": 3852800, "sample_time_ms": 22579.338, "load_time_ms": 37.118, "grad_time_ms": 10147.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018557598814368248, "policy_loss": -0.005808284040540457, "vf_loss": 82.31928253173828, "vf_explained_var": 0.7716462016105652, "kl": 0.001915976870805025, "entropy": 1.1357669830322266, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3852800, "episodes_total": 9632, "training_iteration": 301, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-18-56", "timestamp": 1660256336, "time_this_iter_s": 32.42415189743042, "time_total_s": 14748.470853328705, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14748.470853328705, "timesteps_since_restore": 3852800, "iterations_since_restore": 301, "perf": {"cpu_util_percent": 32.14468085106383, "ram_util_percent": 59.210638297872315}} -{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 616.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.055}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.51, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 17.05, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.71, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.41, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.33, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.71, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.71, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0, 633.0, 581.0, 582.0, 636.0, 587.0, 636.0, 636.0, 579.0, 636.0, 636.0, 582.0, 633.0, 639.0, 582.0, 587.0, 639.0, 630.0, 639.0, 636.0, 627.0, 600.0, 639.0, 630.0, 630.0, 627.0, 639.0, 582.0, 627.0, 587.0, 636.0, 633.0, 636.0, 636.0, 633.0, 630.0, 579.0, 630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 324.0, 309.0, 291.0, 290.0, 291.0, 291.0, 319.0, 317.0, 296.0, 291.0, 314.0, 322.0, 314.0, 322.0, 278.0, 301.0, 314.0, 322.0, 316.0, 320.0, 291.0, 291.0, 314.0, 319.0, 319.0, 320.0, 286.0, 296.0, 283.0, 304.0, 320.0, 319.0, 324.0, 306.0, 322.0, 317.0, 318.0, 318.0, 308.0, 319.0, 299.0, 301.0, 321.0, 318.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 320.0, 319.0, 288.0, 294.0, 319.0, 308.0, 289.0, 298.0, 314.0, 322.0, 317.0, 316.0, 316.0, 320.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9487147885298458, "mean_processing_ms": 0.25802520052917743, "mean_inference_ms": 1.5320224616406188}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7248000, "num_steps_sampled": 3865600, "sample_time_ms": 22988.548, "load_time_ms": 36.983, "grad_time_ms": 10329.984, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00113882205914706, "policy_loss": -0.0069201975129544735, "vf_loss": 86.32308959960938, "vf_explained_var": 0.7628341317176819, "kl": 0.0021745546255260706, "entropy": 1.146581768989563, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3865600, "episodes_total": 9664, "training_iteration": 302, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-19-29", "timestamp": 1660256369, "time_this_iter_s": 33.80998110771179, "time_total_s": 14782.280834436417, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14782.280834436417, "timesteps_since_restore": 3865600, "iterations_since_restore": 302, "perf": {"cpu_util_percent": 31.58125, "ram_util_percent": 58.88958333333333}} -{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 610.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 305.09}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 186.58, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.47, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 17.0, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0, 636.0, 633.0, 630.0, 579.0, 630.0, 639.0, 630.0, 587.0, 627.0, 639.0, 633.0, 573.0, 627.0, 633.0, 576.0, 639.0, 630.0, 564.0, 633.0, 627.0, 636.0, 636.0, 627.0, 633.0, 579.0, 639.0, 636.0, 639.0, 576.0, 639.0, 636.0, 639.0, 587.0, 581.0, 582.0, 639.0, 630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 312.0, 324.0, 319.0, 314.0, 311.0, 319.0, 283.0, 296.0, 313.0, 317.0, 322.0, 317.0, 316.0, 314.0, 288.0, 299.0, 305.0, 322.0, 319.0, 320.0, 329.0, 304.0, 278.0, 295.0, 311.0, 316.0, 316.0, 317.0, 290.0, 286.0, 317.0, 322.0, 321.0, 309.0, 280.0, 284.0, 322.0, 311.0, 314.0, 313.0, 326.0, 310.0, 314.0, 322.0, 311.0, 316.0, 316.0, 317.0, 288.0, 291.0, 311.0, 328.0, 317.0, 319.0, 319.0, 320.0, 289.0, 287.0, 317.0, 322.0, 320.0, 316.0, 327.0, 312.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9470934663079638, "mean_processing_ms": 0.2577011854235747, "mean_inference_ms": 1.5308098128397853}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7272000, "num_steps_sampled": 3878400, "sample_time_ms": 22956.932, "load_time_ms": 37.245, "grad_time_ms": 10308.821, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009572577546350658, "policy_loss": -0.00890685711055994, "vf_loss": 85.18466186523438, "vf_explained_var": 0.7909882068634033, "kl": 0.00206771120429039, "entropy": 1.137712836265564, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3878400, "episodes_total": 9696, "training_iteration": 303, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-20-03", "timestamp": 1660256403, "time_this_iter_s": 33.27155518531799, "time_total_s": 14815.552389621735, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14815.552389621735, "timesteps_since_restore": 3878400, "iterations_since_restore": 303, "perf": {"cpu_util_percent": 32.59574468085106, "ram_util_percent": 58.840425531914875}} -{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 606.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 303.28}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 185.76, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.34, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0, 587.0, 581.0, 582.0, 639.0, 630.0, 621.0, 582.0, 636.0, 630.0, 633.0, 527.0, 627.0, 582.0, 633.0, 603.0, 630.0, 639.0, 624.0, 630.0, 636.0, 639.0, 465.0, 639.0, 636.0, 570.0, 630.0, 567.0, 633.0, 582.0, 630.0, 633.0, 636.0, 563.0, 636.0, 582.0, 636.0, 639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 277.0, 310.0, 298.0, 283.0, 293.0, 289.0, 317.0, 322.0, 306.0, 324.0, 318.0, 303.0, 290.0, 292.0, 314.0, 322.0, 317.0, 313.0, 314.0, 319.0, 259.0, 268.0, 305.0, 322.0, 281.0, 301.0, 314.0, 319.0, 302.0, 301.0, 309.0, 321.0, 319.0, 320.0, 311.0, 313.0, 310.0, 320.0, 304.0, 332.0, 325.0, 314.0, 232.0, 233.0, 314.0, 325.0, 311.0, 325.0, 291.0, 279.0, 318.0, 312.0, 277.0, 290.0, 315.0, 318.0, 298.0, 284.0, 309.0, 321.0, 311.0, 322.0, 319.0, 317.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9454753487433492, "mean_processing_ms": 0.2573778744956284, "mean_inference_ms": 1.529523880785963}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7296000, "num_steps_sampled": 3891200, "sample_time_ms": 23052.875, "load_time_ms": 37.015, "grad_time_ms": 10279.094, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008152422960847616, "policy_loss": -0.008779828436672688, "vf_loss": 85.31393432617188, "vf_explained_var": 0.7709566950798035, "kl": 0.0019413350382819772, "entropy": 1.1336184740066528, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3891200, "episodes_total": 9728, "training_iteration": 304, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-20-35", "timestamp": 1660256435, "time_this_iter_s": 32.335684061050415, "time_total_s": 14847.888073682785, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14847.888073682785, "timesteps_since_restore": 3891200, "iterations_since_restore": 304, "perf": {"cpu_util_percent": 32.43260869565216, "ram_util_percent": 58.8478260869565}} -{"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 608.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.03}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 186.86, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.81, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0, 563.0, 636.0, 582.0, 636.0, 639.0, 630.0, 633.0, 576.0, 630.0, 633.0, 621.0, 633.0, 633.0, 582.0, 123.0, 636.0, 633.0, 582.0, 627.0, 639.0, 633.0, 639.0, 587.0, 633.0, 630.0, 633.0, 621.0, 633.0, 630.0, 630.0, 633.0, 376.0, 630.0, 636.0, 582.0, 633.0, 630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 279.0, 284.0, 319.0, 317.0, 291.0, 291.0, 324.0, 312.0, 314.0, 325.0, 309.0, 321.0, 315.0, 318.0, 280.0, 296.0, 311.0, 319.0, 321.0, 312.0, 308.0, 313.0, 317.0, 316.0, 316.0, 317.0, 292.0, 290.0, 60.0, 63.0, 314.0, 322.0, 324.0, 309.0, 291.0, 291.0, 316.0, 311.0, 319.0, 320.0, 304.0, 329.0, 319.0, 320.0, 283.0, 304.0, 318.0, 315.0, 321.0, 309.0, 321.0, 312.0, 321.0, 300.0, 314.0, 319.0, 319.0, 311.0, 311.0, 319.0, 316.0, 317.0, 182.0, 194.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9438635758880244, "mean_processing_ms": 0.25705717388077914, "mean_inference_ms": 1.528223444830069}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7320000, "num_steps_sampled": 3904000, "sample_time_ms": 22489.17, "load_time_ms": 36.377, "grad_time_ms": 10072.238, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005805303808301687, "policy_loss": -0.002536727814003825, "vf_loss": 89.14191436767578, "vf_explained_var": 0.7592394948005676, "kl": 0.0022071560379117727, "entropy": 1.1443239450454712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3904000, "episodes_total": 9760, "training_iteration": 305, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-21-09", "timestamp": 1660256469, "time_this_iter_s": 33.905731201171875, "time_total_s": 14881.793804883957, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14881.793804883957, "timesteps_since_restore": 3904000, "iterations_since_restore": 305, "perf": {"cpu_util_percent": 30.185416666666665, "ram_util_percent": 58.73750000000001}} -{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 612.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.015}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.43, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.14, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.82, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.57, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.54, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0, 630.0, 636.0, 582.0, 633.0, 630.0, 510.0, 587.0, 564.0, 579.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 636.0, 630.0, 569.0, 633.0, 639.0, 582.0, 633.0, 633.0, 636.0, 587.0, 633.0, 636.0, 639.0, 633.0, 530.0, 636.0, 627.0, 582.0, 633.0, 636.0, 636.0, 587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 319.0, 311.0, 324.0, 312.0, 291.0, 291.0, 316.0, 317.0, 316.0, 314.0, 256.0, 254.0, 286.0, 301.0, 276.0, 288.0, 290.0, 289.0, 289.0, 298.0, 296.0, 286.0, 295.0, 292.0, 313.0, 320.0, 318.0, 312.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 283.0, 286.0, 319.0, 314.0, 319.0, 320.0, 294.0, 288.0, 324.0, 309.0, 315.0, 318.0, 325.0, 311.0, 294.0, 293.0, 313.0, 320.0, 320.0, 316.0, 316.0, 323.0, 316.0, 317.0, 265.0, 265.0, 317.0, 319.0, 317.0, 310.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9422548001581954, "mean_processing_ms": 0.2567378512099078, "mean_inference_ms": 1.5267428709470499}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7344000, "num_steps_sampled": 3916800, "sample_time_ms": 22275.217, "load_time_ms": 36.758, "grad_time_ms": 10257.278, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026395271997898817, "policy_loss": -0.005618779454380274, "vf_loss": 88.24600219726562, "vf_explained_var": 0.7727122902870178, "kl": 0.0020911165047436953, "entropy": 1.1326097249984741, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3916800, "episodes_total": 9792, "training_iteration": 306, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-21-40", "timestamp": 1660256500, "time_this_iter_s": 30.71598792076111, "time_total_s": 14912.509792804718, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14912.509792804718, "timesteps_since_restore": 3916800, "iterations_since_restore": 306, "perf": {"cpu_util_percent": 32.81395348837209, "ram_util_percent": 58.667441860465125}} -{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 617.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 308.845}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 190.49, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.9, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.51, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0, 582.0, 633.0, 636.0, 636.0, 587.0, 633.0, 633.0, 639.0, 636.0, 587.0, 587.0, 636.0, 633.0, 636.0, 587.0, 587.0, 630.0, 587.0, 582.0, 630.0, 636.0, 581.0, 630.0, 630.0, 639.0, 590.0, 618.0, 627.0, 633.0, 575.0, 627.0, 630.0, 636.0, 636.0, 582.0, 582.0, 636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 288.0, 294.0, 316.0, 317.0, 322.0, 314.0, 314.0, 322.0, 299.0, 288.0, 309.0, 324.0, 315.0, 318.0, 312.0, 327.0, 317.0, 319.0, 290.0, 297.0, 298.0, 289.0, 320.0, 316.0, 313.0, 320.0, 321.0, 315.0, 288.0, 299.0, 293.0, 294.0, 318.0, 312.0, 294.0, 293.0, 286.0, 296.0, 318.0, 312.0, 319.0, 317.0, 290.0, 291.0, 320.0, 310.0, 314.0, 316.0, 324.0, 315.0, 295.0, 295.0, 314.0, 304.0, 320.0, 307.0, 319.0, 314.0, 285.0, 290.0, 316.0, 311.0, 313.0, 317.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9406496030307164, "mean_processing_ms": 0.25641787950901196, "mean_inference_ms": 1.5250430030640023}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7368000, "num_steps_sampled": 3929600, "sample_time_ms": 21842.439, "load_time_ms": 36.991, "grad_time_ms": 10104.518, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00218362919986248, "policy_loss": -0.005569128785282373, "vf_loss": 83.15591430664062, "vf_explained_var": 0.7728936076164246, "kl": 0.0017335275188088417, "entropy": 1.1256619691848755, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3929600, "episodes_total": 9824, "training_iteration": 307, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-22-09", "timestamp": 1660256529, "time_this_iter_s": 29.1308012008667, "time_total_s": 14941.640594005585, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14941.640594005585, "timesteps_since_restore": 3929600, "iterations_since_restore": 307, "perf": {"cpu_util_percent": 35.333333333333336, "ram_util_percent": 58.726190476190474}} -{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 620.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 310.09}, "custom_metrics": {"sparse_reward_mean": 214.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 190.58, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 19.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.43, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.43, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.43, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 636.0, 351.0, 633.0, 582.0, 633.0, 636.0, 639.0, 630.0, 639.0, 639.0, 582.0, 579.0, 627.0, 636.0, 639.0, 633.0, 627.0, 630.0, 579.0, 636.0, 587.0, 587.0, 636.0, 579.0, 633.0, 630.0, 627.0, 639.0, 636.0, 636.0, 630.0, 536.0, 636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 320.0, 316.0, 327.0, 309.0, 288.0, 294.0, 293.0, 289.0, 314.0, 322.0, 172.0, 179.0, 313.0, 320.0, 280.0, 302.0, 318.0, 315.0, 322.0, 314.0, 324.0, 315.0, 316.0, 314.0, 322.0, 317.0, 319.0, 320.0, 294.0, 288.0, 290.0, 289.0, 310.0, 317.0, 319.0, 317.0, 314.0, 325.0, 316.0, 317.0, 316.0, 311.0, 315.0, 315.0, 285.0, 294.0, 319.0, 317.0, 283.0, 304.0, 285.0, 302.0, 332.0, 304.0, 284.0, 295.0, 316.0, 317.0, 311.0, 319.0, 323.0, 304.0, 319.0, 320.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9390469484848887, "mean_processing_ms": 0.2560959206110981, "mean_inference_ms": 1.5230623769962466}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7392000, "num_steps_sampled": 3942400, "sample_time_ms": 21606.28, "load_time_ms": 36.86, "grad_time_ms": 10018.576, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012893896782770753, "policy_loss": -0.006317433435469866, "vf_loss": 81.66983795166016, "vf_explained_var": 0.7689216732978821, "kl": 0.0018363837152719498, "entropy": 1.1203217506408691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3942400, "episodes_total": 9856, "training_iteration": 308, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-22-36", "timestamp": 1660256556, "time_this_iter_s": 27.574139833450317, "time_total_s": 14969.214733839035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14969.214733839035, "timesteps_since_restore": 3942400, "iterations_since_restore": 308, "perf": {"cpu_util_percent": 34.44871794871795, "ram_util_percent": 58.748717948717946}} -{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 625.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 312.605}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.61, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.85, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.25, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.2, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.2, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.2, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0, 636.0, 636.0, 630.0, 536.0, 636.0, 639.0, 633.0, 587.0, 627.0, 639.0, 633.0, 639.0, 633.0, 636.0, 639.0, 587.0, 636.0, 596.0, 639.0, 630.0, 596.0, 630.0, 639.0, 624.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 639.0, 636.0, 624.0, 639.0, 633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 317.0, 319.0, 320.0, 316.0, 308.0, 322.0, 263.0, 273.0, 317.0, 319.0, 315.0, 324.0, 318.0, 315.0, 292.0, 295.0, 302.0, 325.0, 327.0, 312.0, 311.0, 322.0, 327.0, 312.0, 311.0, 322.0, 314.0, 322.0, 319.0, 320.0, 296.0, 291.0, 315.0, 321.0, 294.0, 302.0, 319.0, 320.0, 310.0, 320.0, 304.0, 292.0, 316.0, 314.0, 317.0, 322.0, 313.0, 311.0, 316.0, 317.0, 314.0, 325.0, 319.0, 317.0, 297.0, 282.0, 319.0, 317.0, 304.0, 323.0, 321.0, 315.0, 314.0, 322.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9374628711409474, "mean_processing_ms": 0.2557781242683051, "mean_inference_ms": 1.5211277740560474}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7416000, "num_steps_sampled": 3955200, "sample_time_ms": 21548.086, "load_time_ms": 36.789, "grad_time_ms": 9960.253, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017820480279624462, "policy_loss": -0.005541125778108835, "vf_loss": 78.87618255615234, "vf_explained_var": 0.777707040309906, "kl": 0.0019577995408326387, "entropy": 1.1288975477218628, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3955200, "episodes_total": 9888, "training_iteration": 309, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-23-06", "timestamp": 1660256586, "time_this_iter_s": 30.049942016601562, "time_total_s": 14999.264675855637, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 14999.264675855637, "timesteps_since_restore": 3955200, "iterations_since_restore": 309, "perf": {"cpu_util_percent": 34.733333333333334, "ram_util_percent": 58.76428571428571}} -{"episode_reward_max": 639.0, "episode_reward_min": 533.0, "episode_reward_mean": 625.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 312.6}, "custom_metrics": {"sparse_reward_mean": 217.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.2, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.32, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.98, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.36, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.65, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.07, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.65, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.07, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.65, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.07, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0, 639.0, 636.0, 624.0, 639.0, 633.0, 636.0, 639.0, 630.0, 627.0, 636.0, 633.0, 630.0, 630.0, 630.0, 630.0, 561.0, 627.0, 627.0, 639.0, 633.0, 579.0, 633.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 639.0, 639.0, 630.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 322.0, 317.0, 314.0, 322.0, 308.0, 316.0, 320.0, 319.0, 310.0, 323.0, 316.0, 320.0, 317.0, 322.0, 310.0, 320.0, 311.0, 316.0, 319.0, 317.0, 306.0, 327.0, 321.0, 309.0, 313.0, 317.0, 311.0, 319.0, 308.0, 322.0, 278.0, 283.0, 318.0, 309.0, 315.0, 312.0, 327.0, 312.0, 316.0, 317.0, 282.0, 297.0, 314.0, 319.0, 316.0, 317.0, 290.0, 292.0, 314.0, 316.0, 313.0, 323.0, 316.0, 320.0, 313.0, 314.0, 319.0, 320.0, 323.0, 316.0, 316.0, 314.0, 314.0, 319.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9359034441976002, "mean_processing_ms": 0.2554667399859708, "mean_inference_ms": 1.5193431419939385}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7440000, "num_steps_sampled": 3968000, "sample_time_ms": 21381.604, "load_time_ms": 36.734, "grad_time_ms": 9849.343, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021060302387923002, "policy_loss": -0.005640763323754072, "vf_loss": 83.09170532226562, "vf_explained_var": 0.7722363471984863, "kl": 0.0021093024406582117, "entropy": 1.12474524974823, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3968000, "episodes_total": 9920, "training_iteration": 310, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-23-36", "timestamp": 1660256616, "time_this_iter_s": 29.72802186012268, "time_total_s": 15028.99269771576, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15028.99269771576, "timesteps_since_restore": 3968000, "iterations_since_restore": 310, "perf": {"cpu_util_percent": 37.352380952380955, "ram_util_percent": 59.30714285714285}} -{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 623.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 311.605}, "custom_metrics": {"sparse_reward_mean": 216.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 191.21, "shaped_reward_min": 166, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.42, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.26, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.78, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.78, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.78, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.78, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0, 576.0, 579.0, 636.0, 633.0, 636.0, 633.0, 639.0, 630.0, 587.0, 639.0, 624.0, 636.0, 639.0, 639.0, 630.0, 630.0, 633.0, 606.0, 627.0, 627.0, 636.0, 630.0, 633.0, 627.0, 587.0, 639.0, 627.0, 639.0, 636.0, 639.0, 636.0, 636.0, 627.0, 636.0, 627.0, 533.0, 630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 293.0, 283.0, 296.0, 283.0, 316.0, 320.0, 313.0, 320.0, 314.0, 322.0, 322.0, 311.0, 322.0, 317.0, 311.0, 319.0, 288.0, 299.0, 319.0, 320.0, 311.0, 313.0, 319.0, 317.0, 330.0, 309.0, 317.0, 322.0, 314.0, 316.0, 311.0, 319.0, 319.0, 314.0, 294.0, 312.0, 310.0, 317.0, 309.0, 318.0, 320.0, 316.0, 314.0, 316.0, 309.0, 324.0, 313.0, 314.0, 291.0, 296.0, 329.0, 310.0, 321.0, 306.0, 323.0, 316.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 319.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9343560450317893, "mean_processing_ms": 0.25515871155662695, "mean_inference_ms": 1.5176487497740194}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7464000, "num_steps_sampled": 3980800, "sample_time_ms": 21156.178, "load_time_ms": 36.97, "grad_time_ms": 9869.74, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005363213713280857, "policy_loss": -0.0070701222866773605, "vf_loss": 81.7235336303711, "vf_explained_var": 0.7686123847961426, "kl": 0.0019356707343831658, "entropy": 1.131825566291809, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3980800, "episodes_total": 9952, "training_iteration": 311, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-24-07", "timestamp": 1660256647, "time_this_iter_s": 30.375731229782104, "time_total_s": 15059.368428945541, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15059.368428945541, "timesteps_since_restore": 3980800, "iterations_since_restore": 311, "perf": {"cpu_util_percent": 34.15581395348838, "ram_util_percent": 58.86046511627907}} -{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 621.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 310.815}, "custom_metrics": {"sparse_reward_mean": 215.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.43, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.35, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.84, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.65, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.3, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.65, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.65, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0, 627.0, 636.0, 627.0, 533.0, 630.0, 582.0, 639.0, 587.0, 636.0, 593.0, 633.0, 627.0, 639.0, 627.0, 639.0, 633.0, 636.0, 630.0, 630.0, 630.0, 636.0, 633.0, 636.0, 582.0, 627.0, 639.0, 639.0, 582.0, 636.0, 627.0, 587.0, 636.0, 633.0, 633.0, 624.0, 636.0, 636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 314.0, 313.0, 312.0, 324.0, 313.0, 314.0, 260.0, 273.0, 316.0, 314.0, 296.0, 286.0, 317.0, 322.0, 296.0, 291.0, 321.0, 315.0, 302.0, 291.0, 308.0, 325.0, 318.0, 309.0, 322.0, 317.0, 315.0, 312.0, 322.0, 317.0, 309.0, 324.0, 321.0, 315.0, 322.0, 308.0, 313.0, 317.0, 317.0, 313.0, 322.0, 314.0, 320.0, 313.0, 319.0, 317.0, 293.0, 289.0, 316.0, 311.0, 322.0, 317.0, 317.0, 322.0, 291.0, 291.0, 318.0, 318.0, 305.0, 322.0, 296.0, 291.0, 311.0, 325.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9328126111792946, "mean_processing_ms": 0.25484999330507013, "mean_inference_ms": 1.5158702163945572}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7488000, "num_steps_sampled": 3993600, "sample_time_ms": 20762.299, "load_time_ms": 37.78, "grad_time_ms": 9801.464, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010485065868124366, "policy_loss": -0.00627841567620635, "vf_loss": 78.91202545166016, "vf_explained_var": 0.7650337219238281, "kl": 0.0021341259125620127, "entropy": 1.1285619735717773, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 3993600, "episodes_total": 9984, "training_iteration": 312, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-24-36", "timestamp": 1660256676, "time_this_iter_s": 29.196868896484375, "time_total_s": 15088.565297842026, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15088.565297842026, "timesteps_since_restore": 3993600, "iterations_since_restore": 312, "perf": {"cpu_util_percent": 34.93170731707317, "ram_util_percent": 58.778048780487794}} -{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 620.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 310.045}, "custom_metrics": {"sparse_reward_mean": 215.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.69, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.75, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 16.86, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.36, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.34, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0, 633.0, 633.0, 624.0, 636.0, 636.0, 633.0, 582.0, 630.0, 573.0, 636.0, 630.0, 639.0, 639.0, 636.0, 639.0, 630.0, 587.0, 630.0, 627.0, 633.0, 630.0, 636.0, 636.0, 587.0, 639.0, 633.0, 639.0, 587.0, 630.0, 633.0, 530.0, 630.0, 639.0, 633.0, 582.0, 633.0, 636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 309.0, 324.0, 319.0, 314.0, 326.0, 298.0, 317.0, 319.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 319.0, 311.0, 285.0, 288.0, 322.0, 314.0, 311.0, 319.0, 322.0, 317.0, 320.0, 319.0, 309.0, 327.0, 319.0, 320.0, 313.0, 317.0, 296.0, 291.0, 311.0, 319.0, 308.0, 319.0, 314.0, 319.0, 319.0, 311.0, 315.0, 321.0, 312.0, 324.0, 296.0, 291.0, 322.0, 317.0, 319.0, 314.0, 319.0, 320.0, 296.0, 291.0, 303.0, 327.0, 309.0, 324.0, 258.0, 272.0, 321.0, 309.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9312771580791189, "mean_processing_ms": 0.25454145444446286, "mean_inference_ms": 1.514166938443501}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7512000, "num_steps_sampled": 4006400, "sample_time_ms": 20653.913, "load_time_ms": 37.661, "grad_time_ms": 9796.127, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016649666940793395, "policy_loss": -0.005544379819184542, "vf_loss": 77.78628540039062, "vf_explained_var": 0.7735397815704346, "kl": 0.0018068948993459344, "entropy": 1.1385550498962402, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4006400, "episodes_total": 10016, "training_iteration": 313, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-25-08", "timestamp": 1660256708, "time_this_iter_s": 32.13484477996826, "time_total_s": 15120.700142621994, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15120.700142621994, "timesteps_since_restore": 4006400, "iterations_since_restore": 313, "perf": {"cpu_util_percent": 34.30434782608695, "ram_util_percent": 58.8478260869565}} -{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.42, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.24, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.28, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0, 639.0, 633.0, 582.0, 633.0, 636.0, 624.0, 630.0, 633.0, 636.0, 636.0, 618.0, 636.0, 627.0, 567.0, 636.0, 636.0, 582.0, 633.0, 561.0, 582.0, 582.0, 630.0, 627.0, 587.0, 630.0, 639.0, 630.0, 633.0, 633.0, 633.0, 633.0, 639.0, 630.0, 630.0, 630.0, 627.0, 630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 319.0, 320.0, 324.0, 309.0, 287.0, 295.0, 319.0, 314.0, 319.0, 317.0, 313.0, 311.0, 316.0, 314.0, 319.0, 314.0, 312.0, 324.0, 316.0, 320.0, 305.0, 313.0, 309.0, 327.0, 309.0, 318.0, 273.0, 294.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 311.0, 322.0, 280.0, 281.0, 290.0, 292.0, 291.0, 291.0, 311.0, 319.0, 313.0, 314.0, 296.0, 291.0, 308.0, 322.0, 322.0, 317.0, 320.0, 310.0, 318.0, 315.0, 319.0, 314.0, 317.0, 316.0, 316.0, 317.0, 317.0, 322.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 0.929753206610253, "mean_processing_ms": 0.25423624145174695, "mean_inference_ms": 1.5125268663026497}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7536000, "num_steps_sampled": 4019200, "sample_time_ms": 20614.803, "load_time_ms": 37.576, "grad_time_ms": 9935.143, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005392418708652258, "policy_loss": -0.002403073711320758, "vf_loss": 83.61144256591797, "vf_explained_var": 0.7692582011222839, "kl": 0.0021780512761324644, "entropy": 1.131287932395935, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4019200, "episodes_total": 10048, "training_iteration": 314, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-25-41", "timestamp": 1660256741, "time_this_iter_s": 33.338226318359375, "time_total_s": 15154.038368940353, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15154.038368940353, "timesteps_since_restore": 4019200, "iterations_since_restore": 314, "perf": {"cpu_util_percent": 34.01914893617022, "ram_util_percent": 58.87021276595746}} -{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 614.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.41}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.82, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0, 630.0, 630.0, 630.0, 627.0, 630.0, 636.0, 639.0, 630.0, 570.0, 633.0, 639.0, 636.0, 582.0, 576.0, 630.0, 639.0, 636.0, 639.0, 636.0, 630.0, 590.0, 639.0, 582.0, 621.0, 573.0, 627.0, 516.0, 639.0, 572.0, 639.0, 579.0, 636.0, 630.0, 639.0, 627.0, 630.0, 630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 310.0, 317.0, 316.0, 314.0, 319.0, 317.0, 320.0, 319.0, 323.0, 307.0, 282.0, 288.0, 319.0, 314.0, 317.0, 322.0, 319.0, 317.0, 294.0, 288.0, 298.0, 278.0, 310.0, 320.0, 324.0, 315.0, 317.0, 319.0, 322.0, 317.0, 316.0, 320.0, 313.0, 317.0, 291.0, 299.0, 317.0, 322.0, 271.0, 311.0, 316.0, 305.0, 278.0, 295.0, 315.0, 312.0, 249.0, 267.0, 317.0, 322.0, 277.0, 295.0, 319.0, 320.0, 297.0, 282.0, 317.0, 319.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9282592182519096, "mean_processing_ms": 0.253938832905362, "mean_inference_ms": 1.511735993488809}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7560000, "num_steps_sampled": 4032000, "sample_time_ms": 21363.9, "load_time_ms": 37.576, "grad_time_ms": 10037.47, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003922081959899515, "policy_loss": -0.007403677329421043, "vf_loss": 83.57759857177734, "vf_explained_var": 0.7612032294273376, "kl": 0.001659790868870914, "entropy": 1.12375009059906, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4032000, "episodes_total": 10080, "training_iteration": 315, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-26-24", "timestamp": 1660256784, "time_this_iter_s": 42.419737100601196, "time_total_s": 15196.458106040955, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15196.458106040955, "timesteps_since_restore": 4032000, "iterations_since_restore": 315, "perf": {"cpu_util_percent": 30.92, "ram_util_percent": 58.89833333333333}} -{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 612.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.395}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.39, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.82, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.71, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0, 630.0, 639.0, 627.0, 630.0, 630.0, 587.0, 639.0, 587.0, 630.0, 582.0, 636.0, 636.0, 627.0, 639.0, 639.0, 584.0, 587.0, 639.0, 582.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 587.0, 630.0, 633.0, 630.0, 630.0, 593.0, 582.0, 587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 313.0, 317.0, 316.0, 323.0, 316.0, 311.0, 321.0, 309.0, 316.0, 314.0, 292.0, 295.0, 314.0, 325.0, 301.0, 286.0, 311.0, 319.0, 295.0, 287.0, 319.0, 317.0, 317.0, 319.0, 305.0, 322.0, 315.0, 324.0, 317.0, 322.0, 298.0, 286.0, 298.0, 289.0, 319.0, 320.0, 286.0, 296.0, 309.0, 321.0, 296.0, 283.0, 318.0, 315.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 298.0, 289.0, 319.0, 311.0, 316.0, 317.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9267730740231102, "mean_processing_ms": 0.2536434066530759, "mean_inference_ms": 1.510930494877483}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7584000, "num_steps_sampled": 4044800, "sample_time_ms": 21428.224, "load_time_ms": 37.331, "grad_time_ms": 10025.054, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037510674446821213, "policy_loss": -0.004000961780548096, "vf_loss": 83.20941925048828, "vf_explained_var": 0.7631545066833496, "kl": 0.0021077950950711966, "entropy": 1.1378254890441895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4044800, "episodes_total": 10112, "training_iteration": 316, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-26-55", "timestamp": 1660256815, "time_this_iter_s": 31.23423171043396, "time_total_s": 15227.692337751389, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15227.692337751389, "timesteps_since_restore": 4044800, "iterations_since_restore": 316, "perf": {"cpu_util_percent": 34.638636363636365, "ram_util_percent": 58.979545454545466}} -{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.135}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.07, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0, 630.0, 630.0, 593.0, 582.0, 587.0, 590.0, 633.0, 633.0, 630.0, 630.0, 627.0, 582.0, 573.0, 630.0, 630.0, 582.0, 636.0, 639.0, 630.0, 587.0, 639.0, 630.0, 627.0, 630.0, 630.0, 639.0, 459.0, 582.0, 627.0, 582.0, 627.0, 636.0, 627.0, 633.0, 530.0, 639.0, 630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 323.0, 307.0, 315.0, 315.0, 297.0, 296.0, 291.0, 291.0, 290.0, 297.0, 294.0, 296.0, 327.0, 306.0, 314.0, 319.0, 308.0, 322.0, 319.0, 311.0, 311.0, 316.0, 294.0, 288.0, 282.0, 291.0, 318.0, 312.0, 309.0, 321.0, 291.0, 291.0, 318.0, 318.0, 317.0, 322.0, 316.0, 314.0, 296.0, 291.0, 318.0, 321.0, 313.0, 317.0, 318.0, 309.0, 311.0, 319.0, 306.0, 324.0, 317.0, 322.0, 231.0, 228.0, 288.0, 294.0, 315.0, 312.0, 294.0, 288.0, 308.0, 319.0, 316.0, 320.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9252951052976854, "mean_processing_ms": 0.2533493735803219, "mean_inference_ms": 1.5101286664706006}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7608000, "num_steps_sampled": 4057600, "sample_time_ms": 21664.809, "load_time_ms": 37.646, "grad_time_ms": 10004.469, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0035793918650597334, "policy_loss": -0.004444916266947985, "vf_loss": 85.9527359008789, "vf_explained_var": 0.7614016532897949, "kl": 0.0019710592459887266, "entropy": 1.1419222354888916, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4057600, "episodes_total": 10144, "training_iteration": 317, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-27-26", "timestamp": 1660256846, "time_this_iter_s": 31.29483914375305, "time_total_s": 15258.987176895142, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15258.987176895142, "timesteps_since_restore": 4057600, "iterations_since_restore": 317, "perf": {"cpu_util_percent": 35.325, "ram_util_percent": 58.888636363636344}} -{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.105}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.41, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.99, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.99, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.99, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.99, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0, 627.0, 633.0, 530.0, 639.0, 630.0, 633.0, 630.0, 587.0, 630.0, 587.0, 564.0, 576.0, 627.0, 636.0, 630.0, 526.0, 587.0, 582.0, 627.0, 633.0, 633.0, 630.0, 621.0, 579.0, 587.0, 582.0, 633.0, 636.0, 633.0, 576.0, 636.0, 633.0, 587.0, 639.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 324.0, 303.0, 311.0, 322.0, 265.0, 265.0, 314.0, 325.0, 315.0, 315.0, 316.0, 317.0, 306.0, 324.0, 290.0, 297.0, 316.0, 314.0, 298.0, 289.0, 286.0, 278.0, 287.0, 289.0, 299.0, 328.0, 313.0, 323.0, 321.0, 309.0, 261.0, 265.0, 280.0, 307.0, 291.0, 291.0, 313.0, 314.0, 314.0, 319.0, 318.0, 315.0, 311.0, 319.0, 318.0, 303.0, 291.0, 288.0, 293.0, 294.0, 286.0, 296.0, 321.0, 312.0, 316.0, 320.0, 311.0, 322.0, 288.0, 288.0, 324.0, 312.0, 309.0, 324.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9238104630131461, "mean_processing_ms": 0.2530528359146936, "mean_inference_ms": 1.5086822690810806}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7632000, "num_steps_sampled": 4070400, "sample_time_ms": 21837.173, "load_time_ms": 37.475, "grad_time_ms": 10172.214, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033606337383389473, "policy_loss": -0.004180160816758871, "vf_loss": 81.06964111328125, "vf_explained_var": 0.7688854336738586, "kl": 0.0033983252942562103, "entropy": 1.1323426961898804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4070400, "episodes_total": 10176, "training_iteration": 318, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-27-57", "timestamp": 1660256877, "time_this_iter_s": 30.971107959747314, "time_total_s": 15289.958284854889, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15289.958284854889, "timesteps_since_restore": 4070400, "iterations_since_restore": 318, "perf": {"cpu_util_percent": 34.67727272727273, "ram_util_percent": 58.75454545454544}} -{"episode_reward_max": 639.0, "episode_reward_min": 342.0, "episode_reward_mean": 610.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 166.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.2}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.6, "shaped_reward_min": 102, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.79, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0, 587.0, 639.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 627.0, 630.0, 624.0, 587.0, 627.0, 639.0, 627.0, 630.0, 582.0, 633.0, 593.0, 633.0, 639.0, 639.0, 630.0, 639.0, 582.0, 342.0, 630.0, 564.0, 639.0, 587.0, 587.0, 627.0, 633.0, 627.0, 627.0, 627.0, 579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 300.0, 287.0, 322.0, 317.0, 294.0, 288.0, 322.0, 314.0, 319.0, 314.0, 295.0, 292.0, 311.0, 319.0, 311.0, 316.0, 308.0, 319.0, 322.0, 308.0, 319.0, 305.0, 285.0, 302.0, 306.0, 321.0, 319.0, 320.0, 321.0, 306.0, 321.0, 309.0, 295.0, 287.0, 317.0, 316.0, 299.0, 294.0, 319.0, 314.0, 329.0, 310.0, 319.0, 320.0, 324.0, 306.0, 314.0, 325.0, 292.0, 290.0, 166.0, 176.0, 311.0, 319.0, 265.0, 299.0, 322.0, 317.0, 294.0, 293.0, 287.0, 300.0, 313.0, 314.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9223351501134641, "mean_processing_ms": 0.2527584584336856, "mean_inference_ms": 1.5072340124708836}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7656000, "num_steps_sampled": 4083200, "sample_time_ms": 21920.847, "load_time_ms": 37.336, "grad_time_ms": 10402.485, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003446048649493605, "policy_loss": -0.007382390554994345, "vf_loss": 82.95357513427734, "vf_explained_var": 0.759884774684906, "kl": 0.0017484420677646995, "entropy": 1.1367279291152954, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4083200, "episodes_total": 10208, "training_iteration": 319, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-28-31", "timestamp": 1660256911, "time_this_iter_s": 33.19297218322754, "time_total_s": 15323.151257038116, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15323.151257038116, "timesteps_since_restore": 4083200, "iterations_since_restore": 319, "perf": {"cpu_util_percent": 34.074468085106375, "ram_util_percent": 59.221276595744676}} -{"episode_reward_max": 639.0, "episode_reward_min": 518.0, "episode_reward_mean": 612.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.085}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.17, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.53, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0, 633.0, 627.0, 627.0, 627.0, 579.0, 587.0, 639.0, 636.0, 639.0, 630.0, 639.0, 566.0, 639.0, 541.0, 590.0, 587.0, 627.0, 584.0, 639.0, 639.0, 518.0, 582.0, 590.0, 633.0, 630.0, 633.0, 627.0, 636.0, 630.0, 587.0, 633.0, 564.0, 639.0, 630.0, 633.0, 630.0, 582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 309.0, 324.0, 316.0, 311.0, 315.0, 312.0, 319.0, 308.0, 297.0, 282.0, 291.0, 296.0, 319.0, 320.0, 319.0, 317.0, 317.0, 322.0, 308.0, 322.0, 325.0, 314.0, 290.0, 276.0, 319.0, 320.0, 277.0, 264.0, 306.0, 284.0, 285.0, 302.0, 318.0, 309.0, 294.0, 290.0, 320.0, 319.0, 327.0, 312.0, 246.0, 272.0, 291.0, 291.0, 296.0, 294.0, 311.0, 322.0, 316.0, 314.0, 326.0, 307.0, 308.0, 319.0, 322.0, 314.0, 310.0, 320.0, 293.0, 294.0, 319.0, 314.0, 277.0, 287.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9208740877938348, "mean_processing_ms": 0.25246719663876194, "mean_inference_ms": 1.5059256221319566}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7680000, "num_steps_sampled": 4096000, "sample_time_ms": 22117.687, "load_time_ms": 37.357, "grad_time_ms": 10501.528, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030171778053045273, "policy_loss": -0.004927590023726225, "vf_loss": 85.14810943603516, "vf_explained_var": 0.76070237159729, "kl": 0.002144080586731434, "entropy": 1.140079379081726, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4096000, "episodes_total": 10240, "training_iteration": 320, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-29-03", "timestamp": 1660256943, "time_this_iter_s": 32.6832230091095, "time_total_s": 15355.834480047226, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15355.834480047226, "timesteps_since_restore": 4096000, "iterations_since_restore": 320, "perf": {"cpu_util_percent": 32.95652173913044, "ram_util_percent": 59.06739130434784}} -{"episode_reward_max": 639.0, "episode_reward_min": 530.0, "episode_reward_mean": 613.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.82}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.44, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.81, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0, 639.0, 630.0, 633.0, 630.0, 582.0, 630.0, 627.0, 630.0, 633.0, 579.0, 530.0, 587.0, 570.0, 636.0, 627.0, 579.0, 636.0, 633.0, 630.0, 636.0, 624.0, 587.0, 621.0, 630.0, 582.0, 636.0, 630.0, 633.0, 596.0, 630.0, 541.0, 636.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 319.0, 320.0, 316.0, 314.0, 321.0, 312.0, 308.0, 322.0, 292.0, 290.0, 320.0, 310.0, 313.0, 314.0, 316.0, 314.0, 319.0, 314.0, 291.0, 288.0, 256.0, 274.0, 283.0, 304.0, 286.0, 284.0, 319.0, 317.0, 312.0, 315.0, 291.0, 288.0, 319.0, 317.0, 319.0, 314.0, 308.0, 322.0, 317.0, 319.0, 314.0, 310.0, 293.0, 294.0, 307.0, 314.0, 313.0, 317.0, 291.0, 291.0, 312.0, 324.0, 313.0, 317.0, 324.0, 309.0, 304.0, 292.0, 308.0, 322.0, 272.0, 269.0, 309.0, 327.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9194369912164846, "mean_processing_ms": 0.25218136491616727, "mean_inference_ms": 1.5049782377407859}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7704000, "num_steps_sampled": 4108800, "sample_time_ms": 22673.048, "load_time_ms": 37.017, "grad_time_ms": 10506.62, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004381106700748205, "policy_loss": -0.0034678278025239706, "vf_loss": 84.1359634399414, "vf_explained_var": 0.762717068195343, "kl": 0.0020634233951568604, "entropy": 1.12932288646698, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4108800, "episodes_total": 10272, "training_iteration": 321, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-29-39", "timestamp": 1660256979, "time_this_iter_s": 35.97740912437439, "time_total_s": 15391.8118891716, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15391.8118891716, "timesteps_since_restore": 4108800, "iterations_since_restore": 321, "perf": {"cpu_util_percent": 32.44705882352941, "ram_util_percent": 58.78039215686273}} -{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 611.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.78}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.96, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.12, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0, 630.0, 627.0, 582.0, 582.0, 627.0, 630.0, 630.0, 639.0, 633.0, 627.0, 590.0, 587.0, 633.0, 630.0, 536.0, 624.0, 630.0, 575.0, 636.0, 587.0, 590.0, 630.0, 587.0, 633.0, 587.0, 633.0, 633.0, 630.0, 570.0, 630.0, 587.0, 633.0, 582.0, 627.0, 633.0, 636.0, 567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 306.0, 324.0, 318.0, 309.0, 293.0, 289.0, 291.0, 291.0, 313.0, 314.0, 311.0, 319.0, 313.0, 317.0, 316.0, 323.0, 314.0, 319.0, 311.0, 316.0, 299.0, 291.0, 296.0, 291.0, 311.0, 322.0, 313.0, 317.0, 257.0, 279.0, 318.0, 306.0, 308.0, 322.0, 281.0, 294.0, 314.0, 322.0, 291.0, 296.0, 298.0, 292.0, 309.0, 321.0, 290.0, 297.0, 313.0, 320.0, 293.0, 294.0, 321.0, 312.0, 319.0, 314.0, 314.0, 316.0, 288.0, 282.0, 315.0, 315.0, 291.0, 296.0, 319.0, 314.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9180140863586184, "mean_processing_ms": 0.2518987033904737, "mean_inference_ms": 1.5041907922787607}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7728000, "num_steps_sampled": 4121600, "sample_time_ms": 23101.321, "load_time_ms": 36.599, "grad_time_ms": 10589.639, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00461611757054925, "policy_loss": -0.003138140542432666, "vf_loss": 83.23612213134766, "vf_explained_var": 0.7696110606193542, "kl": 0.0018815431976690888, "entropy": 1.1387158632278442, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4121600, "episodes_total": 10304, "training_iteration": 322, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-30-14", "timestamp": 1660257014, "time_this_iter_s": 34.30680704116821, "time_total_s": 15426.118696212769, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15426.118696212769, "timesteps_since_restore": 4121600, "iterations_since_restore": 322, "perf": {"cpu_util_percent": 32.239583333333336, "ram_util_percent": 58.845833333333324}} -{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 612.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.44}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.08, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.77, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.01, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.01, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.01, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0, 582.0, 627.0, 633.0, 636.0, 567.0, 582.0, 576.0, 587.0, 630.0, 636.0, 630.0, 636.0, 630.0, 636.0, 624.0, 639.0, 582.0, 624.0, 582.0, 633.0, 587.0, 636.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 636.0, 633.0, 633.0, 627.0, 587.0, 633.0, 582.0, 579.0, 587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 302.0, 280.0, 313.0, 314.0, 314.0, 319.0, 313.0, 323.0, 285.0, 282.0, 287.0, 295.0, 289.0, 287.0, 299.0, 288.0, 326.0, 304.0, 321.0, 315.0, 311.0, 319.0, 314.0, 322.0, 317.0, 313.0, 322.0, 314.0, 311.0, 313.0, 317.0, 322.0, 287.0, 295.0, 313.0, 311.0, 302.0, 280.0, 314.0, 319.0, 295.0, 292.0, 322.0, 314.0, 316.0, 317.0, 324.0, 312.0, 294.0, 293.0, 314.0, 322.0, 311.0, 322.0, 324.0, 309.0, 316.0, 320.0, 318.0, 315.0, 315.0, 318.0, 307.0, 320.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9165951119530592, "mean_processing_ms": 0.25161700044619856, "mean_inference_ms": 1.5033026848004383}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7752000, "num_steps_sampled": 4134400, "sample_time_ms": 23061.265, "load_time_ms": 36.453, "grad_time_ms": 10583.338, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006471332162618637, "policy_loss": -0.0015162205090746284, "vf_loss": 85.53211212158203, "vf_explained_var": 0.7684184908866882, "kl": 0.002009378978982568, "entropy": 1.1313238143920898, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4134400, "episodes_total": 10336, "training_iteration": 323, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-30-45", "timestamp": 1660257045, "time_this_iter_s": 31.667726039886475, "time_total_s": 15457.786422252655, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15457.786422252655, "timesteps_since_restore": 4134400, "iterations_since_restore": 323, "perf": {"cpu_util_percent": 32.73111111111111, "ram_util_percent": 58.83555555555553}} -{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 610.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 262.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.28}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.36, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0, 587.0, 633.0, 582.0, 579.0, 587.0, 587.0, 636.0, 627.0, 582.0, 582.0, 579.0, 630.0, 636.0, 573.0, 639.0, 633.0, 578.0, 564.0, 579.0, 630.0, 639.0, 587.0, 636.0, 627.0, 636.0, 633.0, 579.0, 636.0, 536.0, 630.0, 633.0, 584.0, 587.0, 567.0, 627.0, 636.0, 633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 296.0, 291.0, 313.0, 320.0, 293.0, 289.0, 288.0, 291.0, 292.0, 295.0, 285.0, 302.0, 316.0, 320.0, 321.0, 306.0, 292.0, 290.0, 285.0, 297.0, 289.0, 290.0, 319.0, 311.0, 322.0, 314.0, 284.0, 289.0, 322.0, 317.0, 305.0, 328.0, 301.0, 277.0, 287.0, 277.0, 295.0, 284.0, 306.0, 324.0, 324.0, 315.0, 285.0, 302.0, 322.0, 314.0, 308.0, 319.0, 316.0, 320.0, 314.0, 319.0, 291.0, 288.0, 319.0, 317.0, 262.0, 274.0, 308.0, 322.0, 316.0, 317.0, 292.0, 292.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9151684979449644, "mean_processing_ms": 0.2513316747000567, "mean_inference_ms": 1.5020585872568248}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7776000, "num_steps_sampled": 4147200, "sample_time_ms": 23007.183, "load_time_ms": 36.594, "grad_time_ms": 10309.706, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001403640490025282, "policy_loss": -0.009379498660564423, "vf_loss": 85.44359588623047, "vf_explained_var": 0.7652726769447327, "kl": 0.0018997077131643891, "entropy": 1.136988639831543, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4147200, "episodes_total": 10368, "training_iteration": 324, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-31-15", "timestamp": 1660257075, "time_this_iter_s": 30.05816674232483, "time_total_s": 15487.84458899498, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15487.84458899498, "timesteps_since_restore": 4147200, "iterations_since_restore": 324, "perf": {"cpu_util_percent": 31.88372093023256, "ram_util_percent": 58.74418604651163}} -{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 611.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.985}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.44, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.97, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.63, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.63, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.63, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0, 587.0, 567.0, 627.0, 636.0, 633.0, 636.0, 630.0, 630.0, 627.0, 630.0, 636.0, 636.0, 636.0, 639.0, 536.0, 627.0, 630.0, 633.0, 639.0, 573.0, 636.0, 636.0, 561.0, 636.0, 582.0, 587.0, 633.0, 561.0, 639.0, 587.0, 633.0, 627.0, 570.0, 633.0, 579.0, 639.0, 596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 290.0, 297.0, 285.0, 282.0, 314.0, 313.0, 319.0, 317.0, 316.0, 317.0, 314.0, 322.0, 316.0, 314.0, 319.0, 311.0, 308.0, 319.0, 318.0, 312.0, 320.0, 316.0, 319.0, 317.0, 317.0, 319.0, 319.0, 320.0, 262.0, 274.0, 311.0, 316.0, 319.0, 311.0, 322.0, 311.0, 319.0, 320.0, 291.0, 282.0, 318.0, 318.0, 316.0, 320.0, 279.0, 282.0, 324.0, 312.0, 295.0, 287.0, 293.0, 294.0, 319.0, 314.0, 283.0, 278.0, 319.0, 320.0, 288.0, 299.0, 319.0, 314.0, 316.0, 311.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9137344731192653, "mean_processing_ms": 0.2510432909241615, "mean_inference_ms": 1.5004598778963754}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7800000, "num_steps_sampled": 4160000, "sample_time_ms": 21864.49, "load_time_ms": 36.474, "grad_time_ms": 10036.502, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015124385245144367, "policy_loss": -0.006040909793227911, "vf_loss": 81.20950317382812, "vf_explained_var": 0.7680754661560059, "kl": 0.0019407202489674091, "entropy": 1.135194182395935, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4160000, "episodes_total": 10400, "training_iteration": 325, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-31-44", "timestamp": 1660257104, "time_this_iter_s": 28.25439429283142, "time_total_s": 15516.098983287811, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15516.098983287811, "timesteps_since_restore": 4160000, "iterations_since_restore": 325, "perf": {"cpu_util_percent": 34.2675, "ram_util_percent": 58.785000000000004}} -{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 613.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.815}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.63, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.51, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.15, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.31, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0, 570.0, 633.0, 579.0, 639.0, 596.0, 636.0, 639.0, 633.0, 633.0, 584.0, 582.0, 579.0, 636.0, 639.0, 587.0, 579.0, 558.0, 627.0, 639.0, 573.0, 630.0, 627.0, 581.0, 639.0, 639.0, 639.0, 636.0, 636.0, 567.0, 576.0, 573.0, 570.0, 621.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 276.0, 294.0, 324.0, 309.0, 288.0, 291.0, 319.0, 320.0, 297.0, 299.0, 319.0, 317.0, 319.0, 320.0, 317.0, 316.0, 317.0, 316.0, 304.0, 280.0, 288.0, 294.0, 289.0, 290.0, 322.0, 314.0, 317.0, 322.0, 288.0, 299.0, 290.0, 289.0, 274.0, 284.0, 316.0, 311.0, 322.0, 317.0, 290.0, 283.0, 319.0, 311.0, 314.0, 313.0, 288.0, 293.0, 319.0, 320.0, 322.0, 317.0, 325.0, 314.0, 314.0, 322.0, 319.0, 317.0, 296.0, 271.0, 281.0, 295.0, 277.0, 296.0, 287.0, 283.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9123094574385331, "mean_processing_ms": 0.25075636823694536, "mean_inference_ms": 1.49889087709384}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7824000, "num_steps_sampled": 4172800, "sample_time_ms": 21942.597, "load_time_ms": 36.578, "grad_time_ms": 9969.611, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004511403385549784, "policy_loss": -0.0029741593170911074, "vf_loss": 80.4991683959961, "vf_explained_var": 0.7677297592163086, "kl": 0.002298202132806182, "entropy": 1.1287130117416382, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4172800, "episodes_total": 10432, "training_iteration": 326, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-32-15", "timestamp": 1660257135, "time_this_iter_s": 31.342418909072876, "time_total_s": 15547.441402196884, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15547.441402196884, "timesteps_since_restore": 4172800, "iterations_since_restore": 326, "perf": {"cpu_util_percent": 35.46363636363637, "ram_util_percent": 58.74999999999999}} -{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 615.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.655}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.11, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.28, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.8, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.84, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0, 621.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 630.0, 630.0, 579.0, 636.0, 633.0, 593.0, 630.0, 587.0, 575.0, 633.0, 470.0, 633.0, 465.0, 633.0, 498.0, 633.0, 627.0, 636.0, 639.0, 627.0, 630.0, 630.0, 636.0, 627.0, 636.0, 636.0, 627.0, 630.0, 639.0, 636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 307.0, 314.0, 314.0, 322.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 296.0, 286.0, 321.0, 309.0, 307.0, 323.0, 321.0, 309.0, 285.0, 294.0, 327.0, 309.0, 314.0, 319.0, 293.0, 300.0, 309.0, 321.0, 299.0, 288.0, 285.0, 290.0, 323.0, 310.0, 231.0, 239.0, 309.0, 324.0, 236.0, 229.0, 307.0, 326.0, 250.0, 248.0, 319.0, 314.0, 308.0, 319.0, 319.0, 317.0, 319.0, 320.0, 312.0, 315.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 305.0, 322.0, 321.0, 315.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9108943505628629, "mean_processing_ms": 0.25047292677413735, "mean_inference_ms": 1.4973202589318924}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7848000, "num_steps_sampled": 4185600, "sample_time_ms": 21889.076, "load_time_ms": 35.811, "grad_time_ms": 9803.849, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004534369800239801, "policy_loss": -0.0032402947545051575, "vf_loss": 83.42310333251953, "vf_explained_var": 0.7677843570709229, "kl": 0.0018213322618976235, "entropy": 1.135262131690979, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4185600, "episodes_total": 10464, "training_iteration": 327, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-32-44", "timestamp": 1660257164, "time_this_iter_s": 29.095314025878906, "time_total_s": 15576.536716222763, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15576.536716222763, "timesteps_since_restore": 4185600, "iterations_since_restore": 327, "perf": {"cpu_util_percent": 33.5219512195122, "ram_util_percent": 58.87073170731708}} -{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 615.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 307.57}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 189.14, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.23, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.78, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0, 636.0, 627.0, 630.0, 639.0, 636.0, 636.0, 630.0, 587.0, 639.0, 587.0, 633.0, 630.0, 639.0, 633.0, 582.0, 567.0, 630.0, 630.0, 633.0, 636.0, 576.0, 633.0, 587.0, 624.0, 636.0, 639.0, 639.0, 636.0, 630.0, 627.0, 612.0, 633.0, 570.0, 636.0, 630.0, 636.0, 573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 313.0, 323.0, 316.0, 311.0, 312.0, 318.0, 319.0, 320.0, 324.0, 312.0, 322.0, 314.0, 310.0, 320.0, 286.0, 301.0, 319.0, 320.0, 290.0, 297.0, 314.0, 319.0, 311.0, 319.0, 321.0, 318.0, 314.0, 319.0, 289.0, 293.0, 291.0, 276.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 314.0, 322.0, 288.0, 288.0, 317.0, 316.0, 298.0, 289.0, 316.0, 308.0, 319.0, 317.0, 316.0, 323.0, 317.0, 322.0, 311.0, 325.0, 303.0, 327.0, 305.0, 322.0, 310.0, 302.0, 316.0, 317.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9094951926222233, "mean_processing_ms": 0.25019344496392953, "mean_inference_ms": 1.495850505323708}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7872000, "num_steps_sampled": 4198400, "sample_time_ms": 21863.329, "load_time_ms": 36.113, "grad_time_ms": 9649.438, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027417896781116724, "policy_loss": -0.00593235669657588, "vf_loss": 92.42369842529297, "vf_explained_var": 0.7659929394721985, "kl": 0.0018632843857631087, "entropy": 1.1364426612854004, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4198400, "episodes_total": 10496, "training_iteration": 328, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-33-13", "timestamp": 1660257193, "time_this_iter_s": 29.17238187789917, "time_total_s": 15605.709098100662, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15605.709098100662, "timesteps_since_restore": 4198400, "iterations_since_restore": 328, "perf": {"cpu_util_percent": 35.3780487804878, "ram_util_percent": 58.91219512195122}} -{"episode_reward_max": 644.0, "episode_reward_min": 354.0, "episode_reward_mean": 609.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 304.77}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.54, "shaped_reward_min": 114, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.53, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0, 570.0, 636.0, 630.0, 636.0, 573.0, 579.0, 627.0, 354.0, 644.0, 639.0, 636.0, 627.0, 630.0, 630.0, 630.0, 579.0, 627.0, 636.0, 636.0, 636.0, 633.0, 587.0, 636.0, 627.0, 636.0, 633.0, 630.0, 636.0, 587.0, 633.0, 636.0, 576.0, 590.0, 627.0, 636.0, 636.0, 630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 285.0, 285.0, 319.0, 317.0, 312.0, 318.0, 314.0, 322.0, 288.0, 285.0, 297.0, 282.0, 316.0, 311.0, 179.0, 175.0, 330.0, 314.0, 317.0, 322.0, 317.0, 319.0, 311.0, 316.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 293.0, 286.0, 308.0, 319.0, 316.0, 320.0, 330.0, 306.0, 317.0, 319.0, 319.0, 314.0, 299.0, 288.0, 319.0, 317.0, 318.0, 309.0, 324.0, 312.0, 319.0, 314.0, 314.0, 316.0, 324.0, 312.0, 304.0, 283.0, 321.0, 312.0, 319.0, 317.0, 279.0, 297.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.908108829511899, "mean_processing_ms": 0.24991601885712256, "mean_inference_ms": 1.4943968227370834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7896000, "num_steps_sampled": 4211200, "sample_time_ms": 21816.04, "load_time_ms": 36.245, "grad_time_ms": 9396.983, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00012729612353723496, "policy_loss": -0.007717677857726812, "vf_loss": 81.6099853515625, "vf_explained_var": 0.7742553353309631, "kl": 0.0021955876145511866, "entropy": 1.1412299871444702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4211200, "episodes_total": 10528, "training_iteration": 329, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-33-44", "timestamp": 1660257224, "time_this_iter_s": 30.190826892852783, "time_total_s": 15635.899924993515, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15635.899924993515, "timesteps_since_restore": 4211200, "iterations_since_restore": 329, "perf": {"cpu_util_percent": 32.737209302325574, "ram_util_percent": 59.3720930232558}} -{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 611.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.805}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.01, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.06, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0, 590.0, 627.0, 636.0, 636.0, 630.0, 630.0, 630.0, 636.0, 465.0, 639.0, 639.0, 587.0, 639.0, 633.0, 582.0, 587.0, 639.0, 633.0, 636.0, 636.0, 596.0, 587.0, 639.0, 630.0, 633.0, 639.0, 582.0, 636.0, 479.0, 587.0, 630.0, 639.0, 639.0, 516.0, 582.0, 633.0, 587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 294.0, 296.0, 318.0, 309.0, 321.0, 315.0, 316.0, 320.0, 303.0, 327.0, 312.0, 318.0, 317.0, 313.0, 314.0, 322.0, 234.0, 231.0, 322.0, 317.0, 322.0, 317.0, 288.0, 299.0, 317.0, 322.0, 307.0, 326.0, 290.0, 292.0, 295.0, 292.0, 311.0, 328.0, 306.0, 327.0, 322.0, 314.0, 314.0, 322.0, 294.0, 302.0, 289.0, 298.0, 321.0, 318.0, 313.0, 317.0, 310.0, 323.0, 316.0, 323.0, 291.0, 291.0, 316.0, 320.0, 234.0, 245.0, 293.0, 294.0, 316.0, 314.0, 327.0, 312.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9067267612642047, "mean_processing_ms": 0.2496377464704088, "mean_inference_ms": 1.4929051187388651}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7920000, "num_steps_sampled": 4224000, "sample_time_ms": 21531.777, "load_time_ms": 36.36, "grad_time_ms": 9360.92, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021365683060139418, "policy_loss": -0.005692864302545786, "vf_loss": 83.99735260009766, "vf_explained_var": 0.7644996643066406, "kl": 0.0020622028969228268, "entropy": 1.140602469444275, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4224000, "episodes_total": 10560, "training_iteration": 330, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-34-13", "timestamp": 1660257253, "time_this_iter_s": 29.480799913406372, "time_total_s": 15665.380724906921, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15665.380724906921, "timesteps_since_restore": 4224000, "iterations_since_restore": 330, "perf": {"cpu_util_percent": 35.38095238095239, "ram_util_percent": 58.940476190476204}} -{"episode_reward_max": 639.0, "episode_reward_min": 482.0, "episode_reward_mean": 614.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.18}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.96, "shaped_reward_min": 149, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.43, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.36, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0, 639.0, 516.0, 582.0, 633.0, 587.0, 633.0, 582.0, 627.0, 582.0, 639.0, 582.0, 582.0, 630.0, 630.0, 633.0, 636.0, 630.0, 584.0, 530.0, 587.0, 636.0, 573.0, 630.0, 624.0, 627.0, 627.0, 630.0, 549.0, 633.0, 633.0, 482.0, 630.0, 579.0, 636.0, 579.0, 630.0, 639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 324.0, 315.0, 265.0, 251.0, 288.0, 294.0, 314.0, 319.0, 288.0, 299.0, 317.0, 316.0, 291.0, 291.0, 321.0, 306.0, 296.0, 286.0, 316.0, 323.0, 293.0, 289.0, 295.0, 287.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 319.0, 317.0, 314.0, 316.0, 288.0, 296.0, 265.0, 265.0, 292.0, 295.0, 313.0, 323.0, 291.0, 282.0, 323.0, 307.0, 310.0, 314.0, 309.0, 318.0, 324.0, 303.0, 316.0, 314.0, 275.0, 274.0, 318.0, 315.0, 322.0, 311.0, 245.0, 237.0, 313.0, 317.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9053530084533031, "mean_processing_ms": 0.24936183583486593, "mean_inference_ms": 1.491518263590993}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7944000, "num_steps_sampled": 4236800, "sample_time_ms": 21154.165, "load_time_ms": 36.748, "grad_time_ms": 9274.795, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -8.082172280410305e-05, "policy_loss": -0.00758820166811347, "vf_loss": 80.76020050048828, "vf_explained_var": 0.765857994556427, "kl": 0.001791672664694488, "entropy": 1.137281060218811, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4236800, "episodes_total": 10592, "training_iteration": 331, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-34-44", "timestamp": 1660257284, "time_this_iter_s": 31.342971086502075, "time_total_s": 15696.723695993423, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15696.723695993423, "timesteps_since_restore": 4236800, "iterations_since_restore": 331, "perf": {"cpu_util_percent": 33.804545454545455, "ram_util_percent": 58.92272727272726}} -{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 616.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 308.005}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.81, "shaped_reward_min": 165, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.05, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.74, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.74, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.74, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0, 579.0, 636.0, 579.0, 630.0, 639.0, 633.0, 525.0, 630.0, 587.0, 630.0, 630.0, 615.0, 633.0, 630.0, 636.0, 627.0, 584.0, 633.0, 639.0, 582.0, 630.0, 587.0, 630.0, 630.0, 579.0, 630.0, 633.0, 621.0, 630.0, 636.0, 639.0, 636.0, 639.0, 582.0, 627.0, 630.0, 582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 293.0, 286.0, 314.0, 322.0, 290.0, 289.0, 316.0, 314.0, 316.0, 323.0, 309.0, 324.0, 258.0, 267.0, 314.0, 316.0, 291.0, 296.0, 314.0, 316.0, 313.0, 317.0, 307.0, 308.0, 322.0, 311.0, 321.0, 309.0, 317.0, 319.0, 310.0, 317.0, 285.0, 299.0, 319.0, 314.0, 322.0, 317.0, 294.0, 288.0, 321.0, 309.0, 286.0, 301.0, 319.0, 311.0, 314.0, 316.0, 291.0, 288.0, 319.0, 311.0, 316.0, 317.0, 311.0, 310.0, 316.0, 314.0, 319.0, 317.0, 324.0, 315.0, 319.0, 317.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9039942028756937, "mean_processing_ms": 0.24909087688920636, "mean_inference_ms": 1.490273663500262}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7968000, "num_steps_sampled": 4249600, "sample_time_ms": 21096.339, "load_time_ms": 36.622, "grad_time_ms": 9224.002, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002680680714547634, "policy_loss": -0.005275225732475519, "vf_loss": 85.20003509521484, "vf_explained_var": 0.7707304954528809, "kl": 0.00193729845341295, "entropy": 1.128203272819519, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4249600, "episodes_total": 10624, "training_iteration": 332, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-35-18", "timestamp": 1660257318, "time_this_iter_s": 33.216859102249146, "time_total_s": 15729.940555095673, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15729.940555095673, "timesteps_since_restore": 4249600, "iterations_since_restore": 332, "perf": {"cpu_util_percent": 34.92765957446809, "ram_util_percent": 58.93617021276598}} -{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 613.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.97}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.74, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.39, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.05, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0, 639.0, 582.0, 627.0, 630.0, 582.0, 567.0, 587.0, 639.0, 627.0, 627.0, 639.0, 636.0, 636.0, 630.0, 636.0, 636.0, 627.0, 636.0, 584.0, 630.0, 627.0, 630.0, 633.0, 630.0, 579.0, 633.0, 633.0, 627.0, 639.0, 633.0, 633.0, 639.0, 633.0, 576.0, 636.0, 582.0, 633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 319.0, 320.0, 278.0, 304.0, 316.0, 311.0, 316.0, 314.0, 297.0, 285.0, 285.0, 282.0, 289.0, 298.0, 317.0, 322.0, 315.0, 312.0, 321.0, 306.0, 314.0, 325.0, 314.0, 322.0, 308.0, 328.0, 319.0, 311.0, 322.0, 314.0, 319.0, 317.0, 308.0, 319.0, 316.0, 320.0, 295.0, 289.0, 319.0, 311.0, 327.0, 300.0, 318.0, 312.0, 311.0, 322.0, 321.0, 309.0, 280.0, 299.0, 310.0, 323.0, 312.0, 321.0, 308.0, 319.0, 318.0, 321.0, 315.0, 318.0, 321.0, 312.0, 318.0, 321.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9026482821594658, "mean_processing_ms": 0.24882351646773487, "mean_inference_ms": 1.4891609969323358}, "off_policy_estimator": {}, "info": {"num_steps_trained": 7992000, "num_steps_sampled": 4262400, "sample_time_ms": 21161.346, "load_time_ms": 36.685, "grad_time_ms": 9083.544, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006233640480786562, "policy_loss": -0.0020334760192781687, "vf_loss": 88.32830047607422, "vf_explained_var": 0.7596514821052551, "kl": 0.0019263379508629441, "entropy": 1.1314295530319214, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4262400, "episodes_total": 10656, "training_iteration": 333, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-35-49", "timestamp": 1660257349, "time_this_iter_s": 30.911512851715088, "time_total_s": 15760.852067947388, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15760.852067947388, "timesteps_since_restore": 4262400, "iterations_since_restore": 333, "perf": {"cpu_util_percent": 33.54318181818183, "ram_util_percent": 59.031818181818196}} -{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 609.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.7}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.4, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.43, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.98, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.66, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.66, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.66, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0, 633.0, 576.0, 636.0, 582.0, 633.0, 573.0, 633.0, 567.0, 630.0, 576.0, 639.0, 587.0, 630.0, 582.0, 636.0, 639.0, 633.0, 630.0, 636.0, 579.0, 576.0, 627.0, 587.0, 587.0, 579.0, 582.0, 636.0, 636.0, 579.0, 639.0, 587.0, 633.0, 630.0, 582.0, 633.0, 587.0, 636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 290.0, 292.0, 319.0, 314.0, 287.0, 286.0, 316.0, 317.0, 280.0, 287.0, 313.0, 317.0, 281.0, 295.0, 319.0, 320.0, 295.0, 292.0, 311.0, 319.0, 284.0, 298.0, 320.0, 316.0, 316.0, 323.0, 317.0, 316.0, 319.0, 311.0, 324.0, 312.0, 281.0, 298.0, 285.0, 291.0, 311.0, 316.0, 288.0, 299.0, 297.0, 290.0, 288.0, 291.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 280.0, 299.0, 317.0, 322.0, 298.0, 289.0, 314.0, 319.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.9013131460094013, "mean_processing_ms": 0.24855772824532033, "mean_inference_ms": 1.488151121252854}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8016000, "num_steps_sampled": 4275200, "sample_time_ms": 21421.637, "load_time_ms": 36.594, "grad_time_ms": 9205.096, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013225991278886795, "policy_loss": -0.00687911594286561, "vf_loss": 87.6913833618164, "vf_explained_var": 0.7619670033454895, "kl": 0.0022256800439208746, "entropy": 1.1348274946212769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4275200, "episodes_total": 10688, "training_iteration": 334, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-36-22", "timestamp": 1660257382, "time_this_iter_s": 33.877387046813965, "time_total_s": 15794.729454994202, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15794.729454994202, "timesteps_since_restore": 4275200, "iterations_since_restore": 334, "perf": {"cpu_util_percent": 34.24791666666667, "ram_util_percent": 59.02708333333334}} -{"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 608.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.3}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.6, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.75, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0, 630.0, 582.0, 633.0, 587.0, 636.0, 354.0, 587.0, 636.0, 579.0, 636.0, 639.0, 587.0, 468.0, 636.0, 624.0, 630.0, 579.0, 630.0, 584.0, 633.0, 636.0, 636.0, 630.0, 639.0, 639.0, 639.0, 636.0, 639.0, 639.0, 633.0, 636.0, 630.0, 582.0, 636.0, 630.0, 633.0, 633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 316.0, 314.0, 291.0, 291.0, 312.0, 321.0, 298.0, 289.0, 322.0, 314.0, 174.0, 180.0, 288.0, 299.0, 316.0, 320.0, 291.0, 288.0, 326.0, 310.0, 322.0, 317.0, 290.0, 297.0, 225.0, 243.0, 314.0, 322.0, 321.0, 303.0, 311.0, 319.0, 293.0, 286.0, 317.0, 313.0, 301.0, 283.0, 319.0, 314.0, 314.0, 322.0, 324.0, 312.0, 316.0, 314.0, 319.0, 320.0, 317.0, 322.0, 312.0, 327.0, 319.0, 317.0, 320.0, 319.0, 322.0, 317.0, 314.0, 319.0, 314.0, 322.0, 313.0, 317.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8999851577327678, "mean_processing_ms": 0.24829403649185813, "mean_inference_ms": 1.4872182879909173}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8040000, "num_steps_sampled": 4288000, "sample_time_ms": 21915.303, "load_time_ms": 37.24, "grad_time_ms": 9393.89, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024422004353255033, "policy_loss": -0.005287020932883024, "vf_loss": 82.97665405273438, "vf_explained_var": 0.7636620402336121, "kl": 0.001807666034437716, "entropy": 1.136885643005371, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4288000, "episodes_total": 10720, "training_iteration": 335, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-36-58", "timestamp": 1660257418, "time_this_iter_s": 35.08472490310669, "time_total_s": 15829.814179897308, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15829.814179897308, "timesteps_since_restore": 4288000, "iterations_since_restore": 335, "perf": {"cpu_util_percent": 30.30408163265306, "ram_util_percent": 58.94081632653061}} -{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 608.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 179.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.165}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.93, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.45, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.45, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.45, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0, 582.0, 636.0, 630.0, 633.0, 633.0, 582.0, 633.0, 582.0, 365.0, 627.0, 633.0, 639.0, 639.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 576.0, 579.0, 633.0, 582.0, 633.0, 633.0, 636.0, 630.0, 587.0, 618.0, 633.0, 636.0, 627.0, 561.0, 639.0, 624.0, 587.0, 633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 290.0, 292.0, 324.0, 312.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 286.0, 296.0, 313.0, 320.0, 288.0, 294.0, 186.0, 179.0, 308.0, 319.0, 310.0, 323.0, 317.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 290.0, 289.0, 311.0, 319.0, 302.0, 274.0, 285.0, 294.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 286.0, 301.0, 313.0, 305.0, 317.0, 316.0, 325.0, 311.0, 308.0, 319.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.898665270354117, "mean_processing_ms": 0.24803277399210055, "mean_inference_ms": 1.4863570559836363}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8064000, "num_steps_sampled": 4300800, "sample_time_ms": 22027.266, "load_time_ms": 37.353, "grad_time_ms": 9612.106, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008330469136126339, "policy_loss": -0.006751233246177435, "vf_loss": 81.52507781982422, "vf_explained_var": 0.7658050656318665, "kl": 0.001944715972058475, "entropy": 1.1364573240280151, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4300800, "episodes_total": 10752, "training_iteration": 336, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-37-32", "timestamp": 1660257452, "time_this_iter_s": 34.64702320098877, "time_total_s": 15864.461203098297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15864.461203098297, "timesteps_since_restore": 4300800, "iterations_since_restore": 336, "perf": {"cpu_util_percent": 29.189795918367345, "ram_util_percent": 58.936734693877554}} -{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 608.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.205}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.41, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.46, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0, 561.0, 639.0, 624.0, 587.0, 633.0, 627.0, 582.0, 636.0, 627.0, 633.0, 518.0, 587.0, 633.0, 582.0, 582.0, 582.0, 558.0, 561.0, 633.0, 582.0, 579.0, 579.0, 633.0, 639.0, 633.0, 630.0, 624.0, 633.0, 579.0, 636.0, 564.0, 627.0, 630.0, 639.0, 633.0, 584.0, 630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 287.0, 274.0, 322.0, 317.0, 300.0, 324.0, 296.0, 291.0, 316.0, 317.0, 308.0, 319.0, 293.0, 289.0, 317.0, 319.0, 316.0, 311.0, 319.0, 314.0, 266.0, 252.0, 299.0, 288.0, 321.0, 312.0, 295.0, 287.0, 279.0, 303.0, 285.0, 297.0, 287.0, 271.0, 284.0, 277.0, 319.0, 314.0, 301.0, 281.0, 294.0, 285.0, 290.0, 289.0, 318.0, 315.0, 314.0, 325.0, 314.0, 319.0, 319.0, 311.0, 305.0, 319.0, 316.0, 317.0, 291.0, 288.0, 320.0, 316.0, 277.0, 287.0, 306.0, 321.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8973618509446918, "mean_processing_ms": 0.2477754616951261, "mean_inference_ms": 1.4857111837613974}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8088000, "num_steps_sampled": 4313600, "sample_time_ms": 22572.125, "load_time_ms": 37.728, "grad_time_ms": 9903.43, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030143249314278364, "policy_loss": -0.004624274093657732, "vf_loss": 82.12947845458984, "vf_explained_var": 0.7718231081962585, "kl": 0.0020513928029686213, "entropy": 1.1487096548080444, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4313600, "episodes_total": 10784, "training_iteration": 337, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-38-10", "timestamp": 1660257490, "time_this_iter_s": 37.458003759384155, "time_total_s": 15901.919206857681, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15901.919206857681, "timesteps_since_restore": 4313600, "iterations_since_restore": 337, "perf": {"cpu_util_percent": 29.675471698113206, "ram_util_percent": 58.94905660377358}} -{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 610.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.345}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.49, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0, 630.0, 639.0, 633.0, 584.0, 630.0, 465.0, 636.0, 633.0, 582.0, 579.0, 582.0, 633.0, 516.0, 582.0, 587.0, 630.0, 630.0, 633.0, 630.0, 624.0, 633.0, 630.0, 627.0, 630.0, 576.0, 582.0, 630.0, 639.0, 639.0, 633.0, 576.0, 633.0, 633.0, 576.0, 633.0, 633.0, 587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 317.0, 313.0, 314.0, 325.0, 317.0, 316.0, 287.0, 297.0, 318.0, 312.0, 221.0, 244.0, 316.0, 320.0, 319.0, 314.0, 290.0, 292.0, 288.0, 291.0, 283.0, 299.0, 314.0, 319.0, 256.0, 260.0, 283.0, 299.0, 291.0, 296.0, 317.0, 313.0, 311.0, 319.0, 314.0, 319.0, 326.0, 304.0, 308.0, 316.0, 321.0, 312.0, 321.0, 309.0, 314.0, 313.0, 317.0, 313.0, 288.0, 288.0, 286.0, 296.0, 318.0, 312.0, 322.0, 317.0, 327.0, 312.0, 316.0, 317.0, 291.0, 285.0, 319.0, 314.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8960540337881886, "mean_processing_ms": 0.24751510966747803, "mean_inference_ms": 1.4847704330515064}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8112000, "num_steps_sampled": 4326400, "sample_time_ms": 22480.697, "load_time_ms": 37.651, "grad_time_ms": 10202.093, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001484702923335135, "policy_loss": -0.006018726620823145, "vf_loss": 80.70446014404297, "vf_explained_var": 0.7642549872398376, "kl": 0.0017236651619896293, "entropy": 1.1340447664260864, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4326400, "episodes_total": 10816, "training_iteration": 338, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-38-41", "timestamp": 1660257521, "time_this_iter_s": 31.244572162628174, "time_total_s": 15933.16377902031, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15933.16377902031, "timesteps_since_restore": 4326400, "iterations_since_restore": 338, "perf": {"cpu_util_percent": 27.328888888888894, "ram_util_percent": 59.27555555555555}} -{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 612.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 306.0}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.4, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.62, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0, 633.0, 576.0, 633.0, 633.0, 587.0, 636.0, 524.0, 630.0, 636.0, 636.0, 633.0, 639.0, 584.0, 636.0, 587.0, 636.0, 587.0, 639.0, 636.0, 587.0, 639.0, 627.0, 630.0, 636.0, 582.0, 590.0, 573.0, 636.0, 587.0, 633.0, 633.0, 578.0, 630.0, 573.0, 573.0, 624.0, 633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 318.0, 315.0, 288.0, 288.0, 324.0, 309.0, 322.0, 311.0, 301.0, 286.0, 314.0, 322.0, 264.0, 260.0, 316.0, 314.0, 319.0, 317.0, 314.0, 322.0, 316.0, 317.0, 319.0, 320.0, 288.0, 296.0, 319.0, 317.0, 306.0, 281.0, 316.0, 320.0, 299.0, 288.0, 325.0, 314.0, 317.0, 319.0, 293.0, 294.0, 319.0, 320.0, 318.0, 309.0, 316.0, 314.0, 319.0, 317.0, 296.0, 286.0, 296.0, 294.0, 293.0, 280.0, 319.0, 317.0, 283.0, 304.0, 309.0, 324.0, 311.0, 322.0, 282.0, 296.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8947470159821566, "mean_processing_ms": 0.2472537044161699, "mean_inference_ms": 1.483669994240604}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8136000, "num_steps_sampled": 4339200, "sample_time_ms": 22385.99, "load_time_ms": 37.99, "grad_time_ms": 10282.952, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002800325455609709, "policy_loss": -0.007263503968715668, "vf_loss": 81.1025161743164, "vf_explained_var": 0.7635498642921448, "kl": 0.0021122132893651724, "entropy": 1.1334240436553955, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4339200, "episodes_total": 10848, "training_iteration": 339, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-39-11", "timestamp": 1660257551, "time_this_iter_s": 30.060129165649414, "time_total_s": 15963.223908185959, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15963.223908185959, "timesteps_since_restore": 4339200, "iterations_since_restore": 339, "perf": {"cpu_util_percent": 32.38333333333334, "ram_util_percent": 58.776190476190465}} -{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 611.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 305.805}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.01, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0, 630.0, 573.0, 573.0, 624.0, 633.0, 630.0, 630.0, 630.0, 636.0, 633.0, 582.0, 570.0, 633.0, 639.0, 582.0, 630.0, 630.0, 530.0, 633.0, 516.0, 633.0, 636.0, 627.0, 633.0, 587.0, 636.0, 633.0, 573.0, 579.0, 582.0, 633.0, 636.0, 627.0, 587.0, 582.0, 630.0, 587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 316.0, 314.0, 280.0, 293.0, 285.0, 288.0, 312.0, 312.0, 319.0, 314.0, 319.0, 311.0, 316.0, 314.0, 321.0, 309.0, 317.0, 319.0, 317.0, 316.0, 298.0, 284.0, 277.0, 293.0, 319.0, 314.0, 322.0, 317.0, 291.0, 291.0, 321.0, 309.0, 316.0, 314.0, 257.0, 273.0, 321.0, 312.0, 256.0, 260.0, 311.0, 322.0, 316.0, 320.0, 311.0, 316.0, 316.0, 317.0, 293.0, 294.0, 314.0, 322.0, 316.0, 317.0, 276.0, 297.0, 288.0, 291.0, 288.0, 294.0, 318.0, 315.0, 324.0, 312.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8934318351724546, "mean_processing_ms": 0.24699024834277958, "mean_inference_ms": 1.4821846503211202}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8160000, "num_steps_sampled": 4352000, "sample_time_ms": 22391.649, "load_time_ms": 38.471, "grad_time_ms": 10443.746, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004414581228047609, "policy_loss": -0.003194813383743167, "vf_loss": 81.79281616210938, "vf_explained_var": 0.764918863773346, "kl": 0.0018889306811615825, "entropy": 1.1397979259490967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4352000, "episodes_total": 10880, "training_iteration": 340, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-39-42", "timestamp": 1660257582, "time_this_iter_s": 31.150686979293823, "time_total_s": 15994.374595165253, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 15994.374595165253, "timesteps_since_restore": 4352000, "iterations_since_restore": 340, "perf": {"cpu_util_percent": 27.049999999999997, "ram_util_percent": 58.795454545454525}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.27}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.14, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.31, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0, 627.0, 587.0, 582.0, 630.0, 587.0, 636.0, 630.0, 633.0, 639.0, 630.0, 587.0, 636.0, 582.0, 636.0, 576.0, 584.0, 630.0, 639.0, 639.0, 633.0, 627.0, 582.0, 582.0, 630.0, 630.0, 633.0, 522.0, 587.0, 593.0, 633.0, 630.0, 582.0, 636.0, 630.0, 587.0, 636.0, 630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 313.0, 314.0, 293.0, 294.0, 291.0, 291.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 316.0, 314.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 291.0, 296.0, 314.0, 322.0, 285.0, 297.0, 325.0, 311.0, 288.0, 288.0, 295.0, 289.0, 326.0, 304.0, 317.0, 322.0, 322.0, 317.0, 316.0, 317.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 306.0, 324.0, 316.0, 314.0, 311.0, 322.0, 262.0, 260.0, 289.0, 298.0, 296.0, 297.0, 322.0, 311.0, 313.0, 317.0, 293.0, 289.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8921345690140595, "mean_processing_ms": 0.24673048959961144, "mean_inference_ms": 1.4810833291212553}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8184000, "num_steps_sampled": 4364800, "sample_time_ms": 22778.758, "load_time_ms": 38.164, "grad_time_ms": 10656.478, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005775378551334143, "policy_loss": -0.00215825904160738, "vf_loss": 85.0276870727539, "vf_explained_var": 0.7658646106719971, "kl": 0.0019542332738637924, "entropy": 1.1382619142532349, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4364800, "episodes_total": 10912, "training_iteration": 341, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-40-20", "timestamp": 1660257620, "time_this_iter_s": 37.338398933410645, "time_total_s": 16031.712994098663, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16031.712994098663, "timesteps_since_restore": 4364800, "iterations_since_restore": 341, "perf": {"cpu_util_percent": 27.592452830188673, "ram_util_percent": 58.783018867924525}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.22}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.64, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.62, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.93, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.93, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.93, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0, 636.0, 630.0, 587.0, 636.0, 630.0, 590.0, 639.0, 636.0, 582.0, 624.0, 633.0, 587.0, 630.0, 582.0, 630.0, 630.0, 630.0, 636.0, 636.0, 579.0, 587.0, 587.0, 636.0, 584.0, 578.0, 630.0, 636.0, 630.0, 582.0, 579.0, 636.0, 627.0, 582.0, 633.0, 582.0, 630.0, 633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 319.0, 317.0, 313.0, 317.0, 309.0, 278.0, 314.0, 322.0, 323.0, 307.0, 304.0, 286.0, 320.0, 319.0, 316.0, 320.0, 296.0, 286.0, 313.0, 311.0, 311.0, 322.0, 291.0, 296.0, 314.0, 316.0, 296.0, 286.0, 311.0, 319.0, 321.0, 309.0, 316.0, 314.0, 319.0, 317.0, 317.0, 319.0, 293.0, 286.0, 301.0, 286.0, 298.0, 289.0, 314.0, 322.0, 288.0, 296.0, 286.0, 292.0, 318.0, 312.0, 312.0, 324.0, 313.0, 317.0, 288.0, 294.0, 295.0, 284.0, 314.0, 322.0, 316.0, 311.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8908455849362553, "mean_processing_ms": 0.24647214238020762, "mean_inference_ms": 1.4799101322874493}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8208000, "num_steps_sampled": 4377600, "sample_time_ms": 22385.575, "load_time_ms": 38.145, "grad_time_ms": 10547.897, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005002778489142656, "policy_loss": -0.002570929704234004, "vf_loss": 81.44794464111328, "vf_explained_var": 0.765848696231842, "kl": 0.002198006259277463, "entropy": 1.1421762704849243, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4377600, "episodes_total": 10944, "training_iteration": 342, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-40-48", "timestamp": 1660257648, "time_this_iter_s": 28.1991069316864, "time_total_s": 16059.91210103035, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16059.91210103035, "timesteps_since_restore": 4377600, "iterations_since_restore": 342, "perf": {"cpu_util_percent": 30.8525, "ram_util_percent": 58.825}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 608.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.36}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.47, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.92, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0, 582.0, 633.0, 582.0, 630.0, 633.0, 462.0, 636.0, 639.0, 587.0, 576.0, 639.0, 633.0, 633.0, 633.0, 633.0, 630.0, 579.0, 633.0, 636.0, 633.0, 636.0, 582.0, 573.0, 630.0, 582.0, 570.0, 587.0, 573.0, 544.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 639.0, 630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 291.0, 291.0, 316.0, 317.0, 285.0, 297.0, 313.0, 317.0, 321.0, 312.0, 228.0, 234.0, 314.0, 322.0, 327.0, 312.0, 293.0, 294.0, 290.0, 286.0, 320.0, 319.0, 322.0, 311.0, 324.0, 309.0, 314.0, 319.0, 317.0, 316.0, 308.0, 322.0, 287.0, 292.0, 314.0, 319.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 296.0, 286.0, 296.0, 277.0, 321.0, 309.0, 288.0, 294.0, 285.0, 285.0, 285.0, 302.0, 287.0, 286.0, 275.0, 269.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8895732427757973, "mean_processing_ms": 0.24621711936805896, "mean_inference_ms": 1.4789333750052633}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8232000, "num_steps_sampled": 4390400, "sample_time_ms": 22479.914, "load_time_ms": 38.369, "grad_time_ms": 10420.997, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0025722135324031115, "policy_loss": -0.00497409887611866, "vf_loss": 81.19109344482422, "vf_explained_var": 0.7659382820129395, "kl": 0.0019239649409428239, "entropy": 1.1455968618392944, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4390400, "episodes_total": 10976, "training_iteration": 343, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-41-18", "timestamp": 1660257678, "time_this_iter_s": 30.595246076583862, "time_total_s": 16090.507347106934, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16090.507347106934, "timesteps_since_restore": 4390400, "iterations_since_restore": 343, "perf": {"cpu_util_percent": 28.927906976744183, "ram_util_percent": 58.81860465116278}} -{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 607.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 303.955}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.11, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.68, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.04, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.34, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0, 582.0, 630.0, 633.0, 639.0, 630.0, 573.0, 627.0, 630.0, 636.0, 636.0, 636.0, 582.0, 624.0, 576.0, 575.0, 633.0, 587.0, 630.0, 630.0, 636.0, 624.0, 633.0, 636.0, 590.0, 630.0, 584.0, 576.0, 582.0, 633.0, 633.0, 636.0, 582.0, 582.0, 639.0, 579.0, 564.0, 627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 301.0, 281.0, 315.0, 315.0, 318.0, 315.0, 316.0, 323.0, 314.0, 316.0, 288.0, 285.0, 305.0, 322.0, 319.0, 311.0, 314.0, 322.0, 319.0, 317.0, 317.0, 319.0, 287.0, 295.0, 308.0, 316.0, 283.0, 293.0, 290.0, 285.0, 322.0, 311.0, 285.0, 302.0, 318.0, 312.0, 314.0, 316.0, 314.0, 322.0, 311.0, 313.0, 311.0, 322.0, 314.0, 322.0, 285.0, 305.0, 316.0, 314.0, 301.0, 283.0, 295.0, 281.0, 290.0, 292.0, 314.0, 319.0, 311.0, 322.0, 317.0, 319.0, 291.0, 291.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 0.888300023019413, "mean_processing_ms": 0.24596248593117787, "mean_inference_ms": 1.4777042667426168}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8256000, "num_steps_sampled": 4403200, "sample_time_ms": 22324.862, "load_time_ms": 38.863, "grad_time_ms": 10570.078, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001806688029319048, "policy_loss": -0.005986546631902456, "vf_loss": 83.65050506591797, "vf_explained_var": 0.7647177577018738, "kl": 0.002452569780871272, "entropy": 1.1436399221420288, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4403200, "episodes_total": 11008, "training_iteration": 344, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-41-52", "timestamp": 1660257712, "time_this_iter_s": 33.82224774360657, "time_total_s": 16124.32959485054, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16124.32959485054, "timesteps_since_restore": 4403200, "iterations_since_restore": 344, "perf": {"cpu_util_percent": 29.40625, "ram_util_percent": 58.83958333333334}} -{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 603.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.93}, "custom_metrics": {"sparse_reward_mean": 208.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.26, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.41, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.95, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.95, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.95, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0, 582.0, 639.0, 579.0, 564.0, 627.0, 633.0, 630.0, 582.0, 587.0, 582.0, 582.0, 630.0, 530.0, 633.0, 633.0, 639.0, 579.0, 627.0, 633.0, 636.0, 633.0, 587.0, 627.0, 627.0, 630.0, 630.0, 587.0, 582.0, 582.0, 582.0, 582.0, 596.0, 636.0, 633.0, 630.0, 576.0, 567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 286.0, 296.0, 322.0, 317.0, 299.0, 280.0, 281.0, 283.0, 308.0, 319.0, 324.0, 309.0, 311.0, 319.0, 296.0, 286.0, 296.0, 291.0, 288.0, 294.0, 293.0, 289.0, 321.0, 309.0, 259.0, 271.0, 319.0, 314.0, 329.0, 304.0, 314.0, 325.0, 283.0, 296.0, 308.0, 319.0, 314.0, 319.0, 319.0, 317.0, 317.0, 316.0, 288.0, 299.0, 308.0, 319.0, 311.0, 316.0, 316.0, 314.0, 308.0, 322.0, 296.0, 291.0, 288.0, 294.0, 296.0, 286.0, 286.0, 296.0, 282.0, 300.0, 301.0, 295.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8870510518721875, "mean_processing_ms": 0.24571562902687222, "mean_inference_ms": 1.477060351190489}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8280000, "num_steps_sampled": 4416000, "sample_time_ms": 22762.706, "load_time_ms": 38.824, "grad_time_ms": 10571.457, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005129000172019005, "policy_loss": -0.0031147233676165342, "vf_loss": 88.14037322998047, "vf_explained_var": 0.763336181640625, "kl": 0.0019052595598623157, "entropy": 1.1406329870224, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4416000, "episodes_total": 11040, "training_iteration": 345, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-42-32", "timestamp": 1660257752, "time_this_iter_s": 39.47760009765625, "time_total_s": 16163.807194948196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16163.807194948196, "timesteps_since_restore": 4416000, "iterations_since_restore": 345, "perf": {"cpu_util_percent": 25.21272727272727, "ram_util_percent": 58.801818181818156}} -{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 602.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.185}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.57, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.95, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.95, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.95, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0, 636.0, 633.0, 630.0, 576.0, 567.0, 582.0, 630.0, 587.0, 630.0, 587.0, 579.0, 581.0, 630.0, 587.0, 576.0, 521.0, 587.0, 639.0, 587.0, 630.0, 630.0, 639.0, 630.0, 630.0, 587.0, 582.0, 582.0, 630.0, 587.0, 639.0, 582.0, 582.0, 630.0, 630.0, 590.0, 630.0, 582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 321.0, 315.0, 319.0, 314.0, 316.0, 314.0, 296.0, 280.0, 278.0, 289.0, 290.0, 292.0, 321.0, 309.0, 288.0, 299.0, 318.0, 312.0, 288.0, 299.0, 287.0, 292.0, 284.0, 297.0, 323.0, 307.0, 288.0, 299.0, 287.0, 289.0, 260.0, 261.0, 296.0, 291.0, 317.0, 322.0, 291.0, 296.0, 316.0, 314.0, 313.0, 317.0, 322.0, 317.0, 311.0, 319.0, 319.0, 311.0, 285.0, 302.0, 299.0, 283.0, 291.0, 291.0, 306.0, 324.0, 296.0, 291.0, 324.0, 315.0, 293.0, 289.0, 298.0, 284.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.885814344606481, "mean_processing_ms": 0.24547265426163356, "mean_inference_ms": 1.476598213105171}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8304000, "num_steps_sampled": 4428800, "sample_time_ms": 22961.91, "load_time_ms": 38.412, "grad_time_ms": 10610.867, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004017222672700882, "policy_loss": -0.007523353677242994, "vf_loss": 84.93380737304688, "vf_explained_var": 0.7618634104728699, "kl": 0.0019265868468210101, "entropy": 1.1366103887557983, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4428800, "episodes_total": 11072, "training_iteration": 346, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-43-09", "timestamp": 1660257789, "time_this_iter_s": 37.03333592414856, "time_total_s": 16200.840530872345, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16200.840530872345, "timesteps_since_restore": 4428800, "iterations_since_restore": 346, "perf": {"cpu_util_percent": 30.683018867924527, "ram_util_percent": 58.82264150943394}} -{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 600.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.28}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.96, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.88, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0, 630.0, 630.0, 590.0, 630.0, 582.0, 627.0, 633.0, 636.0, 582.0, 633.0, 535.0, 621.0, 579.0, 624.0, 573.0, 579.0, 633.0, 587.0, 636.0, 579.0, 576.0, 636.0, 630.0, 587.0, 522.0, 582.0, 579.0, 627.0, 639.0, 627.0, 627.0, 541.0, 639.0, 639.0, 587.0, 582.0, 627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 311.0, 319.0, 316.0, 314.0, 296.0, 294.0, 318.0, 312.0, 289.0, 293.0, 309.0, 318.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 323.0, 310.0, 273.0, 262.0, 310.0, 311.0, 285.0, 294.0, 322.0, 302.0, 292.0, 281.0, 283.0, 296.0, 317.0, 316.0, 288.0, 299.0, 316.0, 320.0, 288.0, 291.0, 280.0, 296.0, 319.0, 317.0, 314.0, 316.0, 283.0, 304.0, 267.0, 255.0, 289.0, 293.0, 294.0, 285.0, 313.0, 314.0, 319.0, 320.0, 321.0, 306.0, 313.0, 314.0, 271.0, 270.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8845895698087, "mean_processing_ms": 0.24523366351665968, "mean_inference_ms": 1.4761306525958986}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8328000, "num_steps_sampled": 4441600, "sample_time_ms": 22533.553, "load_time_ms": 38.187, "grad_time_ms": 10390.855, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007659171824343503, "policy_loss": -0.008885729126632214, "vf_loss": 86.87432861328125, "vf_explained_var": 0.7518642544746399, "kl": 0.0019218157976865768, "entropy": 1.1352366209030151, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4441600, "episodes_total": 11104, "training_iteration": 347, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-43-40", "timestamp": 1660257820, "time_this_iter_s": 30.97221803665161, "time_total_s": 16231.812748908997, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16231.812748908997, "timesteps_since_restore": 4441600, "iterations_since_restore": 347, "perf": {"cpu_util_percent": 33.49999999999999, "ram_util_percent": 59.33636363636363}} -{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 602.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.26}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.32, "shaped_reward_min": 155, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.02, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0, 639.0, 639.0, 587.0, 582.0, 627.0, 587.0, 569.0, 555.0, 570.0, 639.0, 522.0, 633.0, 579.0, 639.0, 582.0, 627.0, 639.0, 579.0, 639.0, 630.0, 636.0, 639.0, 630.0, 627.0, 579.0, 579.0, 630.0, 573.0, 570.0, 579.0, 630.0, 581.0, 639.0, 587.0, 587.0, 541.0, 630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 325.0, 324.0, 315.0, 297.0, 290.0, 299.0, 283.0, 315.0, 312.0, 299.0, 288.0, 285.0, 284.0, 279.0, 276.0, 287.0, 283.0, 322.0, 317.0, 257.0, 265.0, 319.0, 314.0, 285.0, 294.0, 319.0, 320.0, 291.0, 291.0, 316.0, 311.0, 320.0, 319.0, 298.0, 281.0, 317.0, 322.0, 308.0, 322.0, 319.0, 317.0, 319.0, 320.0, 326.0, 304.0, 318.0, 309.0, 293.0, 286.0, 285.0, 294.0, 321.0, 309.0, 294.0, 279.0, 291.0, 279.0, 283.0, 296.0, 314.0, 316.0, 294.0, 287.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8833591807666914, "mean_processing_ms": 0.24499142247523886, "mean_inference_ms": 1.4752658321565872}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8352000, "num_steps_sampled": 4454400, "sample_time_ms": 22743.076, "load_time_ms": 38.058, "grad_time_ms": 10274.124, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003061985597014427, "policy_loss": -0.004540739115327597, "vf_loss": 81.69985961914062, "vf_explained_var": 0.7570112347602844, "kl": 0.0020776980090886354, "entropy": 1.1345181465148926, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4454400, "episodes_total": 11136, "training_iteration": 348, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-44-12", "timestamp": 1660257852, "time_this_iter_s": 32.17093515396118, "time_total_s": 16263.983684062958, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16263.983684062958, "timesteps_since_restore": 4454400, "iterations_since_restore": 348, "perf": {"cpu_util_percent": 33.459999999999994, "ram_util_percent": 58.76888888888889}} -{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 601.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.855}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.71, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.73, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0, 639.0, 587.0, 587.0, 541.0, 630.0, 587.0, 630.0, 579.0, 582.0, 519.0, 587.0, 630.0, 582.0, 582.0, 582.0, 630.0, 582.0, 570.0, 587.0, 636.0, 630.0, 587.0, 579.0, 530.0, 587.0, 630.0, 630.0, 582.0, 527.0, 582.0, 630.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 322.0, 317.0, 292.0, 295.0, 294.0, 293.0, 260.0, 281.0, 312.0, 318.0, 287.0, 300.0, 316.0, 314.0, 283.0, 296.0, 285.0, 297.0, 270.0, 249.0, 287.0, 300.0, 319.0, 311.0, 285.0, 297.0, 291.0, 291.0, 288.0, 294.0, 311.0, 319.0, 291.0, 291.0, 295.0, 275.0, 285.0, 302.0, 309.0, 327.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 262.0, 268.0, 293.0, 294.0, 316.0, 314.0, 311.0, 319.0, 286.0, 296.0, 265.0, 262.0, 287.0, 295.0, 316.0, 314.0, 321.0, 306.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8821320498213592, "mean_processing_ms": 0.24474905703796943, "mean_inference_ms": 1.4742298243357532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8376000, "num_steps_sampled": 4467200, "sample_time_ms": 22958.396, "load_time_ms": 37.77, "grad_time_ms": 10335.342, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004589398857206106, "policy_loss": -0.003508263034746051, "vf_loss": 86.6531982421875, "vf_explained_var": 0.7629675269126892, "kl": 0.0021643172949552536, "entropy": 1.1353298425674438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4467200, "episodes_total": 11168, "training_iteration": 349, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-44-45", "timestamp": 1660257885, "time_this_iter_s": 32.81572699546814, "time_total_s": 16296.799411058426, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16296.799411058426, "timesteps_since_restore": 4467200, "iterations_since_restore": 349, "perf": {"cpu_util_percent": 33.702173913043474, "ram_util_percent": 58.7978260869565}} -{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 611.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.705}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.81, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.7, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0, 576.0, 636.0, 636.0, 630.0, 582.0, 579.0, 633.0, 627.0, 633.0, 627.0, 576.0, 590.0, 633.0, 582.0, 633.0, 573.0, 576.0, 633.0, 582.0, 633.0, 630.0, 630.0, 587.0, 630.0, 633.0, 630.0, 582.0, 627.0, 582.0, 627.0, 636.0, 587.0, 633.0, 573.0, 633.0, 576.0, 627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 287.0, 289.0, 316.0, 320.0, 322.0, 314.0, 313.0, 317.0, 294.0, 288.0, 286.0, 293.0, 316.0, 317.0, 308.0, 319.0, 316.0, 317.0, 308.0, 319.0, 299.0, 277.0, 288.0, 302.0, 324.0, 309.0, 291.0, 291.0, 319.0, 314.0, 291.0, 282.0, 290.0, 286.0, 322.0, 311.0, 293.0, 289.0, 321.0, 312.0, 316.0, 314.0, 311.0, 319.0, 294.0, 293.0, 316.0, 314.0, 311.0, 322.0, 322.0, 308.0, 288.0, 294.0, 313.0, 314.0, 297.0, 285.0, 313.0, 314.0, 319.0, 317.0, 290.0, 297.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8809197598148512, "mean_processing_ms": 0.2445094292995116, "mean_inference_ms": 1.4734182783968888}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8400000, "num_steps_sampled": 4480000, "sample_time_ms": 23484.152, "load_time_ms": 37.181, "grad_time_ms": 10284.073, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0043442933820188046, "policy_loss": -0.002717310329899192, "vf_loss": 76.19442749023438, "vf_explained_var": 0.7705621719360352, "kl": 0.0019369550282135606, "entropy": 1.1156750917434692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4480000, "episodes_total": 11200, "training_iteration": 350, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-45-21", "timestamp": 1660257921, "time_this_iter_s": 35.891582012176514, "time_total_s": 16332.690993070602, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16332.690993070602, "timesteps_since_restore": 4480000, "iterations_since_restore": 350, "perf": {"cpu_util_percent": 33.81372549019608, "ram_util_percent": 58.90000000000001}} -{"episode_reward_max": 639.0, "episode_reward_min": 524.0, "episode_reward_mean": 609.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.785}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.57, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.75, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.79, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.79, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.79, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0, 633.0, 573.0, 633.0, 576.0, 627.0, 639.0, 636.0, 579.0, 587.0, 587.0, 581.0, 639.0, 576.0, 630.0, 630.0, 582.0, 582.0, 584.0, 630.0, 636.0, 582.0, 576.0, 630.0, 590.0, 582.0, 582.0, 587.0, 582.0, 627.0, 582.0, 582.0, 527.0, 630.0, 639.0, 630.0, 582.0, 636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 316.0, 317.0, 285.0, 288.0, 316.0, 317.0, 285.0, 291.0, 314.0, 313.0, 322.0, 317.0, 317.0, 319.0, 291.0, 288.0, 291.0, 296.0, 299.0, 288.0, 296.0, 285.0, 317.0, 322.0, 291.0, 285.0, 319.0, 311.0, 311.0, 319.0, 283.0, 299.0, 293.0, 289.0, 288.0, 296.0, 314.0, 316.0, 319.0, 317.0, 291.0, 291.0, 285.0, 291.0, 310.0, 320.0, 291.0, 299.0, 290.0, 292.0, 283.0, 299.0, 293.0, 294.0, 298.0, 284.0, 320.0, 307.0, 291.0, 291.0, 288.0, 294.0, 256.0, 271.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.879725637823849, "mean_processing_ms": 0.24427457411119732, "mean_inference_ms": 1.472868024343041}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8424000, "num_steps_sampled": 4492800, "sample_time_ms": 23466.741, "load_time_ms": 37.693, "grad_time_ms": 10196.154, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034591767471283674, "policy_loss": -0.0040799533016979694, "vf_loss": 81.06632232666016, "vf_explained_var": 0.7659358978271484, "kl": 0.0018826290033757687, "entropy": 1.135020136833191, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4492800, "episodes_total": 11232, "training_iteration": 351, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-45-57", "timestamp": 1660257957, "time_this_iter_s": 36.290544748306274, "time_total_s": 16368.981537818909, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16368.981537818909, "timesteps_since_restore": 4492800, "iterations_since_restore": 351, "perf": {"cpu_util_percent": 32.60196078431373, "ram_util_percent": 58.86274509803921}} -{"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 613.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.59}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.98, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 630.0, 639.0, 630.0, 582.0, 636.0, 627.0, 633.0, 579.0, 633.0, 633.0, 636.0, 630.0, 627.0, 633.0, 630.0, 630.0, 630.0, 630.0, 636.0, 630.0, 587.0, 627.0, 633.0, 630.0, 587.0, 633.0, 581.0, 630.0, 633.0, 593.0, 579.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 319.0, 311.0, 315.0, 324.0, 316.0, 314.0, 291.0, 291.0, 316.0, 320.0, 313.0, 314.0, 319.0, 314.0, 280.0, 299.0, 317.0, 316.0, 322.0, 311.0, 321.0, 315.0, 316.0, 314.0, 318.0, 309.0, 319.0, 314.0, 321.0, 309.0, 319.0, 311.0, 314.0, 316.0, 318.0, 312.0, 314.0, 322.0, 316.0, 314.0, 299.0, 288.0, 308.0, 319.0, 316.0, 317.0, 306.0, 324.0, 296.0, 291.0, 314.0, 319.0, 295.0, 286.0, 308.0, 322.0, 319.0, 314.0, 296.0, 297.0, 297.0, 282.0, 311.0, 325.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.878542017918733, "mean_processing_ms": 0.2440427958629624, "mean_inference_ms": 1.4723767990503938}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8448000, "num_steps_sampled": 4505600, "sample_time_ms": 23873.275, "load_time_ms": 37.543, "grad_time_ms": 10389.923, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013407707447186112, "policy_loss": -0.006131558213382959, "vf_loss": 80.36180877685547, "vf_explained_var": 0.7696139812469482, "kl": 0.0018947357311844826, "entropy": 1.127698540687561, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4505600, "episodes_total": 11264, "training_iteration": 352, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-46-31", "timestamp": 1660257991, "time_this_iter_s": 34.20055317878723, "time_total_s": 16403.182090997696, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16403.182090997696, "timesteps_since_restore": 4505600, "iterations_since_restore": 352, "perf": {"cpu_util_percent": 33.57142857142857, "ram_util_percent": 58.8061224489796}} -{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 602.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.135}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.07, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0, 633.0, 633.0, 636.0, 636.0, 636.0, 587.0, 587.0, 593.0, 630.0, 627.0, 524.0, 630.0, 582.0, 576.0, 630.0, 579.0, 567.0, 579.0, 590.0, 630.0, 630.0, 627.0, 633.0, 636.0, 582.0, 582.0, 633.0, 636.0, 587.0, 636.0, 633.0, 633.0, 587.0, 573.0, 630.0, 582.0, 567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 314.0, 322.0, 314.0, 322.0, 299.0, 288.0, 301.0, 286.0, 302.0, 291.0, 314.0, 316.0, 319.0, 308.0, 253.0, 271.0, 313.0, 317.0, 283.0, 299.0, 277.0, 299.0, 318.0, 312.0, 288.0, 291.0, 274.0, 293.0, 291.0, 288.0, 290.0, 300.0, 318.0, 312.0, 311.0, 319.0, 313.0, 314.0, 316.0, 317.0, 314.0, 322.0, 290.0, 292.0, 300.0, 282.0, 319.0, 314.0, 319.0, 317.0, 291.0, 296.0, 324.0, 312.0, 316.0, 317.0, 316.0, 317.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8773547258582282, "mean_processing_ms": 0.24380845402332824, "mean_inference_ms": 1.471635610090532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8472000, "num_steps_sampled": 4518400, "sample_time_ms": 23706.56, "load_time_ms": 37.425, "grad_time_ms": 10690.015, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011104041477665305, "policy_loss": -0.00902702659368515, "vf_loss": 84.78372955322266, "vf_explained_var": 0.7670722007751465, "kl": 0.0019426337676122785, "entropy": 1.1235073804855347, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4518400, "episodes_total": 11296, "training_iteration": 353, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-47-03", "timestamp": 1660258023, "time_this_iter_s": 31.92423105239868, "time_total_s": 16435.106322050095, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16435.106322050095, "timesteps_since_restore": 4518400, "iterations_since_restore": 353, "perf": {"cpu_util_percent": 34.76222222222222, "ram_util_percent": 59.38222222222222}} -{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 600.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.055}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.51, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0, 587.0, 573.0, 630.0, 582.0, 567.0, 633.0, 587.0, 516.0, 587.0, 633.0, 609.0, 633.0, 582.0, 633.0, 570.0, 633.0, 579.0, 639.0, 627.0, 636.0, 533.0, 579.0, 633.0, 630.0, 633.0, 582.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 288.0, 279.0, 294.0, 311.0, 319.0, 291.0, 291.0, 288.0, 279.0, 318.0, 315.0, 292.0, 295.0, 269.0, 247.0, 286.0, 301.0, 319.0, 314.0, 304.0, 305.0, 306.0, 327.0, 291.0, 291.0, 309.0, 324.0, 282.0, 288.0, 319.0, 314.0, 288.0, 291.0, 317.0, 322.0, 321.0, 306.0, 319.0, 317.0, 267.0, 266.0, 295.0, 284.0, 326.0, 307.0, 311.0, 319.0, 319.0, 314.0, 288.0, 294.0, 313.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 323.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8761658335288303, "mean_processing_ms": 0.24357312011055882, "mean_inference_ms": 1.4706370891819096}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8496000, "num_steps_sampled": 4531200, "sample_time_ms": 23737.686, "load_time_ms": 37.309, "grad_time_ms": 10476.005, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014189122011885047, "policy_loss": -0.006124518811702728, "vf_loss": 81.1131591796875, "vf_explained_var": 0.7619540095329285, "kl": 0.002155100228264928, "entropy": 1.1357545852661133, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4531200, "episodes_total": 11328, "training_iteration": 354, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-47-35", "timestamp": 1660258055, "time_this_iter_s": 31.991327047348022, "time_total_s": 16467.097649097443, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16467.097649097443, "timesteps_since_restore": 4531200, "iterations_since_restore": 354, "perf": {"cpu_util_percent": 34.031111111111116, "ram_util_percent": 59.01333333333334}} -{"episode_reward_max": 639.0, "episode_reward_min": 459.0, "episode_reward_mean": 601.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 300.845}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.29, "shaped_reward_min": 139, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0, 636.0, 630.0, 582.0, 630.0, 519.0, 582.0, 630.0, 582.0, 582.0, 576.0, 582.0, 582.0, 582.0, 579.0, 567.0, 633.0, 459.0, 630.0, 582.0, 579.0, 582.0, 636.0, 584.0, 587.0, 627.0, 579.0, 630.0, 639.0, 530.0, 582.0, 630.0, 582.0, 636.0, 513.0, 630.0, 579.0, 587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 319.0, 317.0, 314.0, 316.0, 296.0, 286.0, 319.0, 311.0, 264.0, 255.0, 289.0, 293.0, 314.0, 316.0, 296.0, 286.0, 294.0, 288.0, 283.0, 293.0, 286.0, 296.0, 295.0, 287.0, 296.0, 286.0, 285.0, 294.0, 287.0, 280.0, 311.0, 322.0, 237.0, 222.0, 311.0, 319.0, 285.0, 297.0, 295.0, 284.0, 296.0, 286.0, 316.0, 320.0, 298.0, 286.0, 298.0, 289.0, 305.0, 322.0, 285.0, 294.0, 320.0, 310.0, 322.0, 317.0, 270.0, 260.0, 291.0, 291.0, 316.0, 314.0, 298.0, 284.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.874971949450553, "mean_processing_ms": 0.24333505417716514, "mean_inference_ms": 1.4694074728869129}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8520000, "num_steps_sampled": 4544000, "sample_time_ms": 22906.057, "load_time_ms": 36.777, "grad_time_ms": 10398.662, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002145820064470172, "policy_loss": -0.005216358229517937, "vf_loss": 79.28690338134766, "vf_explained_var": 0.7675671577453613, "kl": 0.0018057804554700851, "entropy": 1.1330245733261108, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4544000, "episodes_total": 11360, "training_iteration": 355, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-48-06", "timestamp": 1660258086, "time_this_iter_s": 30.381834983825684, "time_total_s": 16497.47948408127, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16497.47948408127, "timesteps_since_restore": 4544000, "iterations_since_restore": 355, "perf": {"cpu_util_percent": 34.25348837209302, "ram_util_percent": 58.85813953488373}} -{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 605.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 302.665}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.53, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0, 636.0, 513.0, 630.0, 579.0, 587.0, 627.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 573.0, 633.0, 630.0, 582.0, 639.0, 576.0, 582.0, 639.0, 633.0, 587.0, 627.0, 636.0, 636.0, 630.0, 627.0, 579.0, 536.0, 636.0, 627.0, 582.0, 633.0, 579.0, 582.0, 582.0, 582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 311.0, 325.0, 259.0, 254.0, 314.0, 316.0, 302.0, 277.0, 294.0, 293.0, 313.0, 314.0, 287.0, 292.0, 291.0, 291.0, 285.0, 294.0, 296.0, 286.0, 321.0, 312.0, 293.0, 289.0, 280.0, 293.0, 324.0, 309.0, 314.0, 316.0, 298.0, 284.0, 317.0, 322.0, 283.0, 293.0, 301.0, 281.0, 322.0, 317.0, 311.0, 322.0, 293.0, 294.0, 311.0, 316.0, 319.0, 317.0, 322.0, 314.0, 318.0, 312.0, 313.0, 314.0, 288.0, 291.0, 274.0, 262.0, 316.0, 320.0, 311.0, 316.0, 286.0, 296.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8737833276000079, "mean_processing_ms": 0.243097780431969, "mean_inference_ms": 1.4681762334073296}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8544000, "num_steps_sampled": 4556800, "sample_time_ms": 22541.516, "load_time_ms": 36.891, "grad_time_ms": 10138.825, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00407541124150157, "policy_loss": -0.0034555860329419374, "vf_loss": 80.97249603271484, "vf_explained_var": 0.7684476375579834, "kl": 0.001921386457979679, "entropy": 1.1324900388717651, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4556800, "episodes_total": 11392, "training_iteration": 356, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-48-36", "timestamp": 1660258116, "time_this_iter_s": 30.78407096862793, "time_total_s": 16528.263555049896, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16528.263555049896, "timesteps_since_restore": 4556800, "iterations_since_restore": 356, "perf": {"cpu_util_percent": 32.61818181818182, "ram_util_percent": 59.45681818181819}} -{"episode_reward_max": 639.0, "episode_reward_min": 539.0, "episode_reward_mean": 609.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 304.845}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.69, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.62, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0, 633.0, 579.0, 582.0, 582.0, 582.0, 636.0, 567.0, 570.0, 639.0, 636.0, 587.0, 630.0, 630.0, 636.0, 633.0, 587.0, 639.0, 570.0, 630.0, 636.0, 582.0, 587.0, 633.0, 639.0, 630.0, 633.0, 582.0, 639.0, 579.0, 587.0, 630.0, 587.0, 633.0, 627.0, 576.0, 630.0, 630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 314.0, 319.0, 289.0, 290.0, 288.0, 294.0, 291.0, 291.0, 299.0, 283.0, 316.0, 320.0, 284.0, 283.0, 278.0, 292.0, 317.0, 322.0, 316.0, 320.0, 293.0, 294.0, 311.0, 319.0, 323.0, 307.0, 311.0, 325.0, 319.0, 314.0, 296.0, 291.0, 324.0, 315.0, 285.0, 285.0, 323.0, 307.0, 324.0, 312.0, 296.0, 286.0, 289.0, 298.0, 319.0, 314.0, 322.0, 317.0, 326.0, 304.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 291.0, 291.0, 296.0, 314.0, 316.0, 299.0, 288.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8725992979631204, "mean_processing_ms": 0.24286049780551075, "mean_inference_ms": 1.4669091441555409}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8568000, "num_steps_sampled": 4569600, "sample_time_ms": 22487.797, "load_time_ms": 37.05, "grad_time_ms": 10111.337, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030587399378418922, "policy_loss": -0.004343332722783089, "vf_loss": 79.63693237304688, "vf_explained_var": 0.7643921971321106, "kl": 0.0021028893534094095, "entropy": 1.1232417821884155, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4569600, "episodes_total": 11424, "training_iteration": 357, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-49-07", "timestamp": 1660258147, "time_this_iter_s": 30.161853790283203, "time_total_s": 16558.42540884018, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16558.42540884018, "timesteps_since_restore": 4569600, "iterations_since_restore": 357, "perf": {"cpu_util_percent": 33.550000000000004, "ram_util_percent": 58.85714285714285}} -{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 606.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.095}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.19, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0, 633.0, 627.0, 576.0, 630.0, 630.0, 582.0, 633.0, 582.0, 590.0, 636.0, 627.0, 587.0, 630.0, 636.0, 630.0, 630.0, 630.0, 582.0, 582.0, 539.0, 582.0, 582.0, 576.0, 590.0, 627.0, 582.0, 627.0, 630.0, 579.0, 639.0, 582.0, 633.0, 576.0, 579.0, 582.0, 584.0, 636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 308.0, 325.0, 313.0, 314.0, 286.0, 290.0, 324.0, 306.0, 313.0, 317.0, 294.0, 288.0, 316.0, 317.0, 293.0, 289.0, 288.0, 302.0, 319.0, 317.0, 311.0, 316.0, 301.0, 286.0, 318.0, 312.0, 316.0, 320.0, 316.0, 314.0, 316.0, 314.0, 316.0, 314.0, 289.0, 293.0, 288.0, 294.0, 265.0, 274.0, 283.0, 299.0, 288.0, 294.0, 274.0, 302.0, 296.0, 294.0, 321.0, 306.0, 294.0, 288.0, 308.0, 319.0, 321.0, 309.0, 287.0, 292.0, 320.0, 319.0, 301.0, 281.0, 318.0, 315.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.871423269952275, "mean_processing_ms": 0.24262389296401352, "mean_inference_ms": 1.4655941462374882}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8592000, "num_steps_sampled": 4582400, "sample_time_ms": 22265.267, "load_time_ms": 37.124, "grad_time_ms": 9998.565, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00020107599266339093, "policy_loss": -0.007807094603776932, "vf_loss": 81.7130355834961, "vf_explained_var": 0.7695291638374329, "kl": 0.0018338669324293733, "entropy": 1.1305631399154663, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4582400, "episodes_total": 11456, "training_iteration": 358, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-49-36", "timestamp": 1660258176, "time_this_iter_s": 28.821206092834473, "time_total_s": 16587.246614933014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16587.246614933014, "timesteps_since_restore": 4582400, "iterations_since_restore": 358, "perf": {"cpu_util_percent": 32.69024390243902, "ram_util_percent": 58.856097560975606}} -{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 604.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.47}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.74, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0, 576.0, 579.0, 582.0, 584.0, 636.0, 633.0, 636.0, 639.0, 627.0, 630.0, 582.0, 633.0, 639.0, 573.0, 636.0, 579.0, 633.0, 636.0, 633.0, 636.0, 579.0, 582.0, 579.0, 630.0, 587.0, 636.0, 582.0, 636.0, 633.0, 627.0, 630.0, 639.0, 584.0, 627.0, 627.0, 576.0, 570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 293.0, 283.0, 290.0, 289.0, 291.0, 291.0, 296.0, 288.0, 319.0, 317.0, 316.0, 317.0, 319.0, 317.0, 325.0, 314.0, 313.0, 314.0, 321.0, 309.0, 296.0, 286.0, 313.0, 320.0, 317.0, 322.0, 287.0, 286.0, 312.0, 324.0, 286.0, 293.0, 321.0, 312.0, 314.0, 322.0, 314.0, 319.0, 316.0, 320.0, 291.0, 288.0, 293.0, 289.0, 292.0, 287.0, 316.0, 314.0, 288.0, 299.0, 322.0, 314.0, 286.0, 296.0, 316.0, 320.0, 314.0, 319.0, 313.0, 314.0, 316.0, 314.0, 322.0, 317.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8702563301594608, "mean_processing_ms": 0.24238978711918313, "mean_inference_ms": 1.4642680017402931}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8616000, "num_steps_sampled": 4595200, "sample_time_ms": 22073.451, "load_time_ms": 37.129, "grad_time_ms": 9911.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011308585526421666, "policy_loss": -0.006429137196391821, "vf_loss": 81.29705047607422, "vf_explained_var": 0.7726876139640808, "kl": 0.0017355438321828842, "entropy": 1.139426350593567, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4595200, "episodes_total": 11488, "training_iteration": 359, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-50-06", "timestamp": 1660258206, "time_this_iter_s": 30.0307719707489, "time_total_s": 16617.277386903763, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16617.277386903763, "timesteps_since_restore": 4595200, "iterations_since_restore": 359, "perf": {"cpu_util_percent": 36.002325581395354, "ram_util_percent": 58.82325581395349}} -{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 605.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.73}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.86, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.61, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.59, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.59, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.59, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0, 584.0, 627.0, 627.0, 576.0, 570.0, 582.0, 627.0, 639.0, 579.0, 579.0, 587.0, 630.0, 579.0, 627.0, 582.0, 636.0, 630.0, 624.0, 518.0, 570.0, 636.0, 576.0, 587.0, 624.0, 636.0, 587.0, 627.0, 630.0, 630.0, 584.0, 579.0, 636.0, 576.0, 630.0, 578.0, 467.0, 579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 290.0, 294.0, 311.0, 316.0, 319.0, 308.0, 293.0, 283.0, 282.0, 288.0, 293.0, 289.0, 313.0, 314.0, 322.0, 317.0, 296.0, 283.0, 290.0, 289.0, 288.0, 299.0, 316.0, 314.0, 291.0, 288.0, 318.0, 309.0, 288.0, 294.0, 324.0, 312.0, 316.0, 314.0, 302.0, 322.0, 247.0, 271.0, 285.0, 285.0, 319.0, 317.0, 291.0, 285.0, 296.0, 291.0, 313.0, 311.0, 321.0, 315.0, 301.0, 286.0, 308.0, 319.0, 322.0, 308.0, 312.0, 318.0, 286.0, 298.0, 288.0, 291.0, 319.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.86909953447402, "mean_processing_ms": 0.24215862097735263, "mean_inference_ms": 1.4630909533739367}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8640000, "num_steps_sampled": 4608000, "sample_time_ms": 21874.868, "load_time_ms": 37.48, "grad_time_ms": 9746.645, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013312319060787559, "policy_loss": -0.005779942963272333, "vf_loss": 76.8069839477539, "vf_explained_var": 0.7698413729667664, "kl": 0.001875889953225851, "entropy": 1.1390520334243774, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4608000, "episodes_total": 11520, "training_iteration": 360, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-50-38", "timestamp": 1660258238, "time_this_iter_s": 32.26046180725098, "time_total_s": 16649.537848711014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16649.537848711014, "timesteps_since_restore": 4608000, "iterations_since_restore": 360, "perf": {"cpu_util_percent": 32.559999999999995, "ram_util_percent": 58.84666666666664}} -{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 605.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.965}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.33, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.6, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0, 576.0, 630.0, 578.0, 467.0, 579.0, 630.0, 630.0, 615.0, 579.0, 579.0, 587.0, 633.0, 630.0, 636.0, 584.0, 582.0, 636.0, 633.0, 576.0, 630.0, 581.0, 579.0, 579.0, 636.0, 630.0, 587.0, 576.0, 584.0, 630.0, 587.0, 576.0, 633.0, 582.0, 636.0, 579.0, 582.0, 636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 285.0, 291.0, 318.0, 312.0, 292.0, 286.0, 223.0, 244.0, 299.0, 280.0, 306.0, 324.0, 311.0, 319.0, 308.0, 307.0, 288.0, 291.0, 288.0, 291.0, 288.0, 299.0, 308.0, 325.0, 314.0, 316.0, 319.0, 317.0, 290.0, 294.0, 291.0, 291.0, 321.0, 315.0, 319.0, 314.0, 286.0, 290.0, 306.0, 324.0, 288.0, 293.0, 290.0, 289.0, 299.0, 280.0, 322.0, 314.0, 313.0, 317.0, 283.0, 304.0, 285.0, 291.0, 293.0, 291.0, 324.0, 306.0, 288.0, 299.0, 280.0, 296.0, 314.0, 319.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8679613450602781, "mean_processing_ms": 0.24193358188716718, "mean_inference_ms": 1.4622622611677922}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8664000, "num_steps_sampled": 4620800, "sample_time_ms": 21811.818, "load_time_ms": 37.251, "grad_time_ms": 9734.507, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007378067821264267, "policy_loss": -0.006473819259554148, "vf_loss": 77.80099487304688, "vf_explained_var": 0.7715656161308289, "kl": 0.0017216805135831237, "entropy": 1.1369411945343018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4620800, "episodes_total": 11552, "training_iteration": 361, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-51-13", "timestamp": 1660258273, "time_this_iter_s": 35.53581404685974, "time_total_s": 16685.073662757874, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16685.073662757874, "timesteps_since_restore": 4620800, "iterations_since_restore": 361, "perf": {"cpu_util_percent": 32.418, "ram_util_percent": 58.85999999999999}} -{"episode_reward_max": 639.0, "episode_reward_min": 521.0, "episode_reward_mean": 608.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.125}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.05, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0, 582.0, 636.0, 579.0, 582.0, 636.0, 593.0, 633.0, 639.0, 627.0, 582.0, 587.0, 587.0, 639.0, 636.0, 582.0, 630.0, 633.0, 633.0, 630.0, 582.0, 627.0, 633.0, 639.0, 636.0, 639.0, 521.0, 633.0, 636.0, 630.0, 582.0, 558.0, 576.0, 639.0, 636.0, 624.0, 636.0, 579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 291.0, 291.0, 317.0, 319.0, 294.0, 285.0, 296.0, 286.0, 314.0, 322.0, 293.0, 300.0, 311.0, 322.0, 322.0, 317.0, 316.0, 311.0, 289.0, 293.0, 294.0, 293.0, 288.0, 299.0, 324.0, 315.0, 319.0, 317.0, 296.0, 286.0, 309.0, 321.0, 319.0, 314.0, 320.0, 313.0, 319.0, 311.0, 293.0, 289.0, 318.0, 309.0, 317.0, 316.0, 324.0, 315.0, 314.0, 322.0, 319.0, 320.0, 256.0, 265.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 294.0, 288.0, 281.0, 277.0, 276.0, 300.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8668385855769685, "mean_processing_ms": 0.2417133493697533, "mean_inference_ms": 1.4618269528034153}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8688000, "num_steps_sampled": 4633600, "sample_time_ms": 22112.322, "load_time_ms": 37.291, "grad_time_ms": 9542.038, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006689311121590436, "policy_loss": -0.006603518966585398, "vf_loss": 78.373291015625, "vf_explained_var": 0.7716686129570007, "kl": 0.0016899490728974342, "entropy": 1.1297602653503418, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4633600, "episodes_total": 11584, "training_iteration": 362, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-51-49", "timestamp": 1660258309, "time_this_iter_s": 35.281026124954224, "time_total_s": 16720.354688882828, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16720.354688882828, "timesteps_since_restore": 4633600, "iterations_since_restore": 362, "perf": {"cpu_util_percent": 29.87, "ram_util_percent": 58.83999999999998}} -{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 605.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.605}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.01, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.27, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.69, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0, 639.0, 636.0, 624.0, 636.0, 579.0, 579.0, 630.0, 633.0, 582.0, 636.0, 590.0, 630.0, 587.0, 636.0, 576.0, 630.0, 630.0, 582.0, 576.0, 630.0, 582.0, 587.0, 633.0, 582.0, 636.0, 630.0, 582.0, 587.0, 630.0, 573.0, 630.0, 630.0, 525.0, 579.0, 627.0, 633.0, 624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 319.0, 320.0, 314.0, 322.0, 310.0, 314.0, 316.0, 320.0, 288.0, 291.0, 282.0, 297.0, 319.0, 311.0, 321.0, 312.0, 286.0, 296.0, 314.0, 322.0, 296.0, 294.0, 311.0, 319.0, 298.0, 289.0, 322.0, 314.0, 285.0, 291.0, 313.0, 317.0, 316.0, 314.0, 291.0, 291.0, 298.0, 278.0, 308.0, 322.0, 293.0, 289.0, 293.0, 294.0, 316.0, 317.0, 291.0, 291.0, 319.0, 317.0, 321.0, 309.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8657202120154665, "mean_processing_ms": 0.2414933019778603, "mean_inference_ms": 1.4613549029197088}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8712000, "num_steps_sampled": 4646400, "sample_time_ms": 22230.157, "load_time_ms": 37.429, "grad_time_ms": 9536.697, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00019748850900214165, "policy_loss": -0.00744326738640666, "vf_loss": 82.0882568359375, "vf_explained_var": 0.7681159377098083, "kl": 0.0019463537028059363, "entropy": 1.1361408233642578, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4646400, "episodes_total": 11616, "training_iteration": 363, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-52-22", "timestamp": 1660258342, "time_this_iter_s": 33.05159020423889, "time_total_s": 16753.406279087067, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16753.406279087067, "timesteps_since_restore": 4646400, "iterations_since_restore": 363, "perf": {"cpu_util_percent": 32.35531914893617, "ram_util_percent": 58.848936170212795}} -{"episode_reward_max": 639.0, "episode_reward_min": 353.0, "episode_reward_mean": 605.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 302.71}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.22, "shaped_reward_min": 113, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.62, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.37, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.68, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0, 525.0, 579.0, 627.0, 633.0, 624.0, 587.0, 582.0, 582.0, 579.0, 636.0, 630.0, 567.0, 630.0, 633.0, 636.0, 582.0, 621.0, 630.0, 587.0, 587.0, 639.0, 630.0, 587.0, 587.0, 587.0, 630.0, 636.0, 627.0, 561.0, 627.0, 630.0, 636.0, 582.0, 582.0, 579.0, 587.0, 582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 265.0, 260.0, 287.0, 292.0, 314.0, 313.0, 316.0, 317.0, 306.0, 318.0, 296.0, 291.0, 288.0, 294.0, 287.0, 295.0, 288.0, 291.0, 319.0, 317.0, 316.0, 314.0, 281.0, 286.0, 321.0, 309.0, 316.0, 317.0, 319.0, 317.0, 294.0, 288.0, 307.0, 314.0, 308.0, 322.0, 291.0, 296.0, 285.0, 302.0, 314.0, 325.0, 313.0, 317.0, 295.0, 292.0, 293.0, 294.0, 294.0, 293.0, 311.0, 319.0, 321.0, 315.0, 316.0, 311.0, 284.0, 277.0, 316.0, 311.0, 311.0, 319.0, 320.0, 316.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8646062243279308, "mean_processing_ms": 0.24127430142106637, "mean_inference_ms": 1.4608685672633468}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8736000, "num_steps_sampled": 4659200, "sample_time_ms": 22504.157, "load_time_ms": 37.127, "grad_time_ms": 9578.876, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00030447044991888106, "policy_loss": -0.007189334835857153, "vf_loss": 80.5628433227539, "vf_explained_var": 0.7757663130760193, "kl": 0.002137100091204047, "entropy": 1.1249442100524902, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4659200, "episodes_total": 11648, "training_iteration": 364, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-52-57", "timestamp": 1660258377, "time_this_iter_s": 35.15313506126404, "time_total_s": 16788.55941414833, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16788.55941414833, "timesteps_since_restore": 4659200, "iterations_since_restore": 364, "perf": {"cpu_util_percent": 30.822448979591837, "ram_util_percent": 58.930612244897965}} -{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 600.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 300.455}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 184.91, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.5, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0, 582.0, 582.0, 579.0, 587.0, 582.0, 630.0, 582.0, 579.0, 633.0, 579.0, 582.0, 582.0, 582.0, 627.0, 582.0, 584.0, 633.0, 630.0, 587.0, 546.0, 587.0, 576.0, 630.0, 636.0, 630.0, 630.0, 573.0, 587.0, 636.0, 590.0, 579.0, 582.0, 633.0, 639.0, 576.0, 630.0, 633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 294.0, 288.0, 297.0, 285.0, 282.0, 297.0, 304.0, 283.0, 296.0, 286.0, 313.0, 317.0, 291.0, 291.0, 282.0, 297.0, 314.0, 319.0, 285.0, 294.0, 291.0, 291.0, 288.0, 294.0, 291.0, 291.0, 313.0, 314.0, 293.0, 289.0, 285.0, 299.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 273.0, 273.0, 296.0, 291.0, 287.0, 289.0, 316.0, 314.0, 319.0, 317.0, 318.0, 312.0, 316.0, 314.0, 283.0, 290.0, 299.0, 288.0, 321.0, 315.0, 302.0, 288.0, 286.0, 293.0, 288.0, 294.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8635010378078678, "mean_processing_ms": 0.24105756914616358, "mean_inference_ms": 1.4603867065067317}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8760000, "num_steps_sampled": 4672000, "sample_time_ms": 23125.247, "load_time_ms": 37.852, "grad_time_ms": 9835.319, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003108972916379571, "policy_loss": -0.004532767925411463, "vf_loss": 82.0846176147461, "vf_explained_var": 0.7727766036987305, "kl": 0.00231738924048841, "entropy": 1.1334295272827148, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4672000, "episodes_total": 11680, "training_iteration": 365, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-53-36", "timestamp": 1660258416, "time_this_iter_s": 39.16720676422119, "time_total_s": 16827.726620912552, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16827.726620912552, "timesteps_since_restore": 4672000, "iterations_since_restore": 365, "perf": {"cpu_util_percent": 31.412499999999998, "ram_util_percent": 59.01071428571428}} -{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 599.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 330.0}, "policy_reward_mean": {"ppo": 299.95}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 184.3, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0, 633.0, 639.0, 576.0, 630.0, 633.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 579.0, 630.0, 639.0, 579.0, 636.0, 353.0, 639.0, 587.0, 630.0, 633.0, 630.0, 587.0, 582.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 630.0, 621.0, 582.0, 630.0, 636.0, 627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 314.0, 319.0, 316.0, 323.0, 288.0, 288.0, 321.0, 309.0, 324.0, 309.0, 283.0, 304.0, 293.0, 289.0, 317.0, 319.0, 311.0, 319.0, 322.0, 314.0, 325.0, 311.0, 290.0, 289.0, 308.0, 322.0, 314.0, 325.0, 288.0, 291.0, 322.0, 314.0, 175.0, 178.0, 327.0, 312.0, 290.0, 297.0, 313.0, 317.0, 316.0, 317.0, 311.0, 319.0, 298.0, 289.0, 286.0, 296.0, 316.0, 320.0, 319.0, 314.0, 312.0, 327.0, 313.0, 323.0, 280.0, 302.0, 319.0, 317.0, 309.0, 330.0, 318.0, 312.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8623994673017462, "mean_processing_ms": 0.24084175116520762, "mean_inference_ms": 1.4598463496186935}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8784000, "num_steps_sampled": 4684800, "sample_time_ms": 23187.668, "load_time_ms": 38.066, "grad_time_ms": 10176.48, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020015325862914324, "policy_loss": -0.005890776868909597, "vf_loss": 84.59882354736328, "vf_explained_var": 0.7721861004829407, "kl": 0.002045721048489213, "entropy": 1.1351399421691895, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4684800, "episodes_total": 11712, "training_iteration": 366, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-54-11", "timestamp": 1660258451, "time_this_iter_s": 34.82252907752991, "time_total_s": 16862.54914999008, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16862.54914999008, "timesteps_since_restore": 4684800, "iterations_since_restore": 366, "perf": {"cpu_util_percent": 28.122448979591837, "ram_util_percent": 58.88775510204081}} -{"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 598.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 138.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 299.415}, "custom_metrics": {"sparse_reward_mean": 207.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 183.63, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.72, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.1, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0, 621.0, 582.0, 630.0, 636.0, 627.0, 582.0, 579.0, 633.0, 579.0, 582.0, 627.0, 582.0, 636.0, 590.0, 294.0, 584.0, 630.0, 621.0, 576.0, 630.0, 582.0, 582.0, 584.0, 582.0, 627.0, 579.0, 576.0, 633.0, 590.0, 630.0, 630.0, 579.0, 627.0, 579.0, 630.0, 621.0, 630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 302.0, 319.0, 294.0, 288.0, 309.0, 321.0, 319.0, 317.0, 308.0, 319.0, 291.0, 291.0, 288.0, 291.0, 319.0, 314.0, 285.0, 294.0, 285.0, 297.0, 319.0, 308.0, 292.0, 290.0, 319.0, 317.0, 291.0, 299.0, 138.0, 156.0, 291.0, 293.0, 311.0, 319.0, 313.0, 308.0, 294.0, 282.0, 316.0, 314.0, 291.0, 291.0, 299.0, 283.0, 293.0, 291.0, 298.0, 284.0, 317.0, 310.0, 296.0, 283.0, 286.0, 290.0, 316.0, 317.0, 296.0, 294.0, 314.0, 316.0, 319.0, 311.0, 291.0, 288.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8612908936403926, "mean_processing_ms": 0.24062264484082838, "mean_inference_ms": 1.4590268461349842}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8808000, "num_steps_sampled": 4697600, "sample_time_ms": 23071.295, "load_time_ms": 37.913, "grad_time_ms": 10615.56, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0038227650802582502, "policy_loss": -0.003672233084216714, "vf_loss": 80.5904312133789, "vf_explained_var": 0.7626190185546875, "kl": 0.0024631840642541647, "entropy": 1.128088116645813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4697600, "episodes_total": 11744, "training_iteration": 367, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-54-44", "timestamp": 1660258484, "time_this_iter_s": 33.39541292190552, "time_total_s": 16895.944562911987, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16895.944562911987, "timesteps_since_restore": 4697600, "iterations_since_restore": 367, "perf": {"cpu_util_percent": 29.602127659574467, "ram_util_percent": 58.99148936170216}} -{"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 603.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.92}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.44, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.28, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0, 627.0, 579.0, 630.0, 621.0, 630.0, 636.0, 582.0, 582.0, 639.0, 579.0, 630.0, 576.0, 630.0, 402.0, 639.0, 633.0, 627.0, 587.0, 630.0, 465.0, 582.0, 581.0, 576.0, 582.0, 561.0, 633.0, 465.0, 633.0, 633.0, 633.0, 582.0, 627.0, 636.0, 630.0, 582.0, 582.0, 519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 313.0, 314.0, 285.0, 294.0, 311.0, 319.0, 304.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 291.0, 291.0, 319.0, 320.0, 296.0, 283.0, 316.0, 314.0, 287.0, 289.0, 315.0, 315.0, 198.0, 204.0, 324.0, 315.0, 316.0, 317.0, 311.0, 316.0, 288.0, 299.0, 314.0, 316.0, 231.0, 234.0, 291.0, 291.0, 279.0, 302.0, 296.0, 280.0, 288.0, 294.0, 288.0, 273.0, 321.0, 312.0, 231.0, 234.0, 316.0, 317.0, 313.0, 320.0, 313.0, 320.0, 288.0, 294.0, 308.0, 319.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8601760204307758, "mean_processing_ms": 0.2404010276903208, "mean_inference_ms": 1.4579011173262801}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8832000, "num_steps_sampled": 4710400, "sample_time_ms": 23247.48, "load_time_ms": 37.926, "grad_time_ms": 10791.679, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004444511607289314, "policy_loss": -0.0034118040930479765, "vf_loss": 84.17324829101562, "vf_explained_var": 0.7645478844642639, "kl": 0.0020590554922819138, "entropy": 1.1220086812973022, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4710400, "episodes_total": 11776, "training_iteration": 368, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-55-17", "timestamp": 1660258517, "time_this_iter_s": 32.34189581871033, "time_total_s": 16928.286458730698, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16928.286458730698, "timesteps_since_restore": 4710400, "iterations_since_restore": 368, "perf": {"cpu_util_percent": 31.186956521739134, "ram_util_percent": 58.830434782608684}} -{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 607.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 303.555}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.71, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0, 636.0, 630.0, 582.0, 582.0, 519.0, 633.0, 636.0, 627.0, 630.0, 627.0, 630.0, 627.0, 587.0, 630.0, 633.0, 630.0, 582.0, 582.0, 587.0, 582.0, 627.0, 627.0, 587.0, 576.0, 633.0, 573.0, 636.0, 630.0, 587.0, 582.0, 636.0, 587.0, 573.0, 627.0, 636.0, 587.0, 633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 320.0, 316.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 265.0, 254.0, 314.0, 319.0, 322.0, 314.0, 311.0, 316.0, 312.0, 318.0, 324.0, 303.0, 321.0, 309.0, 313.0, 314.0, 288.0, 299.0, 311.0, 319.0, 310.0, 323.0, 306.0, 324.0, 293.0, 289.0, 288.0, 294.0, 282.0, 305.0, 294.0, 288.0, 309.0, 318.0, 313.0, 314.0, 291.0, 296.0, 295.0, 281.0, 316.0, 317.0, 287.0, 286.0, 318.0, 318.0, 311.0, 319.0, 288.0, 299.0, 294.0, 288.0, 319.0, 317.0, 296.0, 291.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8590666727700321, "mean_processing_ms": 0.24018037596280067, "mean_inference_ms": 1.4567772377738835}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8856000, "num_steps_sampled": 4723200, "sample_time_ms": 23393.671, "load_time_ms": 38.324, "grad_time_ms": 11132.048, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011790187563747168, "policy_loss": -0.008629883639514446, "vf_loss": 80.15734100341797, "vf_explained_var": 0.7653172016143799, "kl": 0.001749455346725881, "entropy": 1.129709243774414, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4723200, "episodes_total": 11808, "training_iteration": 369, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-55-52", "timestamp": 1660258552, "time_this_iter_s": 34.8981990814209, "time_total_s": 16963.18465781212, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16963.18465781212, "timesteps_since_restore": 4723200, "iterations_since_restore": 369, "perf": {"cpu_util_percent": 32.48979591836735, "ram_util_percent": 58.86734693877551}} -{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 604.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 302.025}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.65, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0, 573.0, 627.0, 636.0, 587.0, 633.0, 636.0, 639.0, 579.0, 639.0, 636.0, 525.0, 581.0, 639.0, 573.0, 582.0, 587.0, 630.0, 636.0, 639.0, 633.0, 587.0, 582.0, 636.0, 636.0, 582.0, 636.0, 633.0, 627.0, 587.0, 516.0, 630.0, 633.0, 630.0, 639.0, 573.0, 582.0, 587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 277.0, 296.0, 311.0, 316.0, 319.0, 317.0, 288.0, 299.0, 324.0, 309.0, 316.0, 320.0, 319.0, 320.0, 286.0, 293.0, 319.0, 320.0, 322.0, 314.0, 260.0, 265.0, 288.0, 293.0, 322.0, 317.0, 285.0, 288.0, 293.0, 289.0, 282.0, 305.0, 306.0, 324.0, 314.0, 322.0, 322.0, 317.0, 323.0, 310.0, 293.0, 294.0, 290.0, 292.0, 319.0, 317.0, 319.0, 317.0, 288.0, 294.0, 314.0, 322.0, 316.0, 317.0, 313.0, 314.0, 290.0, 297.0, 259.0, 257.0, 316.0, 314.0, 307.0, 326.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8579708589489354, "mean_processing_ms": 0.23996389125057788, "mean_inference_ms": 1.455828460743175}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8880000, "num_steps_sampled": 4736000, "sample_time_ms": 23349.429, "load_time_ms": 38.506, "grad_time_ms": 11333.701, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00012880750000476837, "policy_loss": -0.007648926693946123, "vf_loss": 83.42855072021484, "vf_explained_var": 0.7715353965759277, "kl": 0.0017624356551095843, "entropy": 1.1302567720413208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4736000, "episodes_total": 11840, "training_iteration": 370, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-56-26", "timestamp": 1660258586, "time_this_iter_s": 33.842254877090454, "time_total_s": 16997.02691268921, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 16997.02691268921, "timesteps_since_restore": 4736000, "iterations_since_restore": 370, "perf": {"cpu_util_percent": 32.75416666666667, "ram_util_percent": 58.89374999999999}} -{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 602.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 301.425}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.85, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.99, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.5, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.99, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.5, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.99, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.5, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0, 630.0, 639.0, 573.0, 582.0, 587.0, 579.0, 584.0, 636.0, 627.0, 624.0, 636.0, 587.0, 582.0, 636.0, 581.0, 587.0, 630.0, 636.0, 633.0, 630.0, 630.0, 636.0, 630.0, 584.0, 516.0, 587.0, 630.0, 513.0, 587.0, 630.0, 633.0, 627.0, 633.0, 582.0, 519.0, 627.0, 579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 321.0, 309.0, 321.0, 318.0, 277.0, 296.0, 285.0, 297.0, 298.0, 289.0, 285.0, 294.0, 288.0, 296.0, 316.0, 320.0, 316.0, 311.0, 308.0, 316.0, 316.0, 320.0, 294.0, 293.0, 301.0, 281.0, 314.0, 322.0, 297.0, 284.0, 299.0, 288.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 324.0, 306.0, 318.0, 312.0, 319.0, 317.0, 319.0, 311.0, 282.0, 302.0, 256.0, 260.0, 288.0, 299.0, 316.0, 314.0, 255.0, 258.0, 293.0, 294.0, 316.0, 314.0, 318.0, 315.0, 311.0, 316.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8568823921280486, "mean_processing_ms": 0.23974756126791116, "mean_inference_ms": 1.4548800404150943}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8904000, "num_steps_sampled": 4748800, "sample_time_ms": 22963.635, "load_time_ms": 38.114, "grad_time_ms": 11201.711, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006220227223820984, "policy_loss": -0.007045889273285866, "vf_loss": 82.31112670898438, "vf_explained_var": 0.7558401226997375, "kl": 0.002209648722782731, "entropy": 1.1263946294784546, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4748800, "episodes_total": 11872, "training_iteration": 371, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-56-56", "timestamp": 1660258616, "time_this_iter_s": 30.35482382774353, "time_total_s": 17027.381736516953, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17027.381736516953, "timesteps_since_restore": 4748800, "iterations_since_restore": 371, "perf": {"cpu_util_percent": 31.702325581395346, "ram_util_percent": 58.86744186046512}} -{"episode_reward_max": 639.0, "episode_reward_min": 453.0, "episode_reward_mean": 601.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 300.635}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 184.47, "shaped_reward_min": 133, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.47, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.47, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.47, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0, 633.0, 582.0, 519.0, 627.0, 579.0, 636.0, 630.0, 584.0, 636.0, 465.0, 627.0, 633.0, 624.0, 633.0, 465.0, 582.0, 587.0, 630.0, 627.0, 639.0, 630.0, 633.0, 453.0, 639.0, 630.0, 630.0, 627.0, 636.0, 579.0, 587.0, 636.0, 510.0, 587.0, 624.0, 630.0, 539.0, 630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 321.0, 312.0, 288.0, 294.0, 268.0, 251.0, 311.0, 316.0, 283.0, 296.0, 322.0, 314.0, 316.0, 314.0, 298.0, 286.0, 313.0, 323.0, 222.0, 243.0, 318.0, 309.0, 321.0, 312.0, 310.0, 314.0, 319.0, 314.0, 220.0, 245.0, 297.0, 285.0, 294.0, 293.0, 318.0, 312.0, 311.0, 316.0, 317.0, 322.0, 316.0, 314.0, 319.0, 314.0, 222.0, 231.0, 319.0, 320.0, 322.0, 308.0, 322.0, 308.0, 311.0, 316.0, 319.0, 317.0, 294.0, 285.0, 283.0, 304.0, 324.0, 312.0, 259.0, 251.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8557975282130863, "mean_processing_ms": 0.23953222980731334, "mean_inference_ms": 1.453823461548284}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8928000, "num_steps_sampled": 4761600, "sample_time_ms": 22359.762, "load_time_ms": 38.161, "grad_time_ms": 11242.342, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00044215377420186996, "policy_loss": -0.007962713949382305, "vf_loss": 80.8259506225586, "vf_explained_var": 0.7670674920082092, "kl": 0.0015741548268124461, "entropy": 1.1240602731704712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4761600, "episodes_total": 11904, "training_iteration": 372, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-57-26", "timestamp": 1660258646, "time_this_iter_s": 29.648212909698486, "time_total_s": 17057.02994942665, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17057.02994942665, "timesteps_since_restore": 4761600, "iterations_since_restore": 372, "perf": {"cpu_util_percent": 34.21904761904762, "ram_util_percent": 58.84523809523809}} -{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 602.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 301.07}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 184.94, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.01, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.01, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.01, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0, 587.0, 624.0, 630.0, 539.0, 630.0, 579.0, 633.0, 630.0, 582.0, 582.0, 630.0, 627.0, 633.0, 630.0, 627.0, 579.0, 633.0, 587.0, 579.0, 582.0, 576.0, 579.0, 627.0, 627.0, 636.0, 639.0, 630.0, 630.0, 582.0, 618.0, 630.0, 587.0, 473.0, 587.0, 624.0, 587.0, 582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 301.0, 286.0, 305.0, 319.0, 313.0, 317.0, 271.0, 268.0, 313.0, 317.0, 288.0, 291.0, 317.0, 316.0, 311.0, 319.0, 289.0, 293.0, 290.0, 292.0, 314.0, 316.0, 319.0, 308.0, 311.0, 322.0, 314.0, 316.0, 310.0, 317.0, 302.0, 277.0, 319.0, 314.0, 299.0, 288.0, 293.0, 286.0, 283.0, 299.0, 297.0, 279.0, 291.0, 288.0, 319.0, 308.0, 314.0, 313.0, 313.0, 323.0, 319.0, 320.0, 310.0, 320.0, 319.0, 311.0, 286.0, 296.0, 314.0, 304.0, 311.0, 319.0, 293.0, 294.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8547135741769734, "mean_processing_ms": 0.23931513380752317, "mean_inference_ms": 1.4526421752907723}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8952000, "num_steps_sampled": 4774400, "sample_time_ms": 22196.797, "load_time_ms": 37.846, "grad_time_ms": 11312.767, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003103644819930196, "policy_loss": -0.004869487602263689, "vf_loss": 85.35115814208984, "vf_explained_var": 0.7750833630561829, "kl": 0.0021017238032072783, "entropy": 1.123950481414795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4774400, "episodes_total": 11936, "training_iteration": 373, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-57-58", "timestamp": 1660258678, "time_this_iter_s": 32.121092796325684, "time_total_s": 17089.151042222977, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17089.151042222977, "timesteps_since_restore": 4774400, "iterations_since_restore": 373, "perf": {"cpu_util_percent": 33.684444444444445, "ram_util_percent": 58.78888888888888}} -{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 602.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 301.475}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.75, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.59, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0, 473.0, 587.0, 624.0, 587.0, 582.0, 587.0, 633.0, 579.0, 633.0, 584.0, 633.0, 636.0, 561.0, 573.0, 570.0, 582.0, 627.0, 633.0, 587.0, 636.0, 579.0, 630.0, 627.0, 525.0, 576.0, 636.0, 636.0, 587.0, 587.0, 633.0, 633.0, 636.0, 630.0, 519.0, 587.0, 587.0, 587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 231.0, 242.0, 291.0, 296.0, 308.0, 316.0, 293.0, 294.0, 290.0, 292.0, 298.0, 289.0, 311.0, 322.0, 296.0, 283.0, 319.0, 314.0, 295.0, 289.0, 317.0, 316.0, 314.0, 322.0, 281.0, 280.0, 285.0, 288.0, 287.0, 283.0, 290.0, 292.0, 313.0, 314.0, 314.0, 319.0, 298.0, 289.0, 319.0, 317.0, 286.0, 293.0, 311.0, 319.0, 320.0, 307.0, 262.0, 263.0, 285.0, 291.0, 320.0, 316.0, 319.0, 317.0, 288.0, 299.0, 301.0, 286.0, 322.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8536367311955897, "mean_processing_ms": 0.23909908370976882, "mean_inference_ms": 1.4514727184055203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 8976000, "num_steps_sampled": 4787200, "sample_time_ms": 21894.527, "load_time_ms": 38.299, "grad_time_ms": 11412.728, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0030573883559554815, "policy_loss": -0.004609658382833004, "vf_loss": 82.26570892333984, "vf_explained_var": 0.7665780186653137, "kl": 0.002119669923558831, "entropy": 1.119057059288025, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4787200, "episodes_total": 11968, "training_iteration": 374, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-58-31", "timestamp": 1660258711, "time_this_iter_s": 33.1311149597168, "time_total_s": 17122.282157182693, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17122.282157182693, "timesteps_since_restore": 4787200, "iterations_since_restore": 374, "perf": {"cpu_util_percent": 34.03404255319149, "ram_util_percent": 59.29999999999999}} -{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 608.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 304.09}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 186.98, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.45, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0, 630.0, 519.0, 587.0, 587.0, 587.0, 630.0, 582.0, 630.0, 630.0, 636.0, 633.0, 584.0, 630.0, 633.0, 408.0, 582.0, 630.0, 584.0, 630.0, 639.0, 630.0, 561.0, 630.0, 633.0, 587.0, 633.0, 630.0, 587.0, 587.0, 636.0, 636.0, 633.0, 390.0, 636.0, 579.0, 579.0, 582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 319.0, 311.0, 264.0, 255.0, 293.0, 294.0, 293.0, 294.0, 296.0, 291.0, 316.0, 314.0, 280.0, 302.0, 319.0, 311.0, 319.0, 311.0, 314.0, 322.0, 316.0, 317.0, 293.0, 291.0, 318.0, 312.0, 319.0, 314.0, 205.0, 203.0, 285.0, 297.0, 308.0, 322.0, 292.0, 292.0, 306.0, 324.0, 319.0, 320.0, 326.0, 304.0, 285.0, 276.0, 311.0, 319.0, 313.0, 320.0, 301.0, 286.0, 311.0, 322.0, 313.0, 317.0, 293.0, 294.0, 301.0, 286.0, 316.0, 320.0, 319.0, 317.0, 316.0, 317.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8525659073078762, "mean_processing_ms": 0.23888337042997632, "mean_inference_ms": 1.4503574621583197}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9000000, "num_steps_sampled": 4800000, "sample_time_ms": 21378.834, "load_time_ms": 37.98, "grad_time_ms": 11184.622, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034743063151836395, "policy_loss": -0.004098345525562763, "vf_loss": 81.30432891845703, "vf_explained_var": 0.7632368206977844, "kl": 0.0018746949499472976, "entropy": 1.1155738830566406, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4800000, "episodes_total": 12000, "training_iteration": 375, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-59-03", "timestamp": 1660258743, "time_this_iter_s": 31.725862979888916, "time_total_s": 17154.008020162582, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17154.008020162582, "timesteps_since_restore": 4800000, "iterations_since_restore": 375, "perf": {"cpu_util_percent": 34.54888888888888, "ram_util_percent": 59.05333333333331}} -{"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 611.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 305.835}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 110, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.66, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0, 390.0, 636.0, 579.0, 579.0, 582.0, 636.0, 636.0, 576.0, 636.0, 639.0, 590.0, 633.0, 636.0, 522.0, 633.0, 578.0, 544.0, 636.0, 630.0, 636.0, 636.0, 627.0, 621.0, 636.0, 582.0, 582.0, 636.0, 587.0, 587.0, 636.0, 636.0, 582.0, 630.0, 636.0, 627.0, 581.0, 636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 193.0, 197.0, 322.0, 314.0, 288.0, 291.0, 281.0, 298.0, 296.0, 286.0, 319.0, 317.0, 320.0, 316.0, 288.0, 288.0, 329.0, 307.0, 324.0, 315.0, 296.0, 294.0, 316.0, 317.0, 319.0, 317.0, 268.0, 254.0, 315.0, 318.0, 285.0, 293.0, 270.0, 274.0, 320.0, 316.0, 315.0, 315.0, 322.0, 314.0, 327.0, 309.0, 314.0, 313.0, 313.0, 308.0, 317.0, 319.0, 296.0, 286.0, 293.0, 289.0, 311.0, 325.0, 291.0, 296.0, 295.0, 292.0, 324.0, 312.0, 319.0, 317.0, 294.0, 288.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8515001170833599, "mean_processing_ms": 0.2386703929555994, "mean_inference_ms": 1.4492475264918965}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9024000, "num_steps_sampled": 4812800, "sample_time_ms": 21252.385, "load_time_ms": 37.854, "grad_time_ms": 10740.605, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007883608341217041, "policy_loss": -0.006749347317963839, "vf_loss": 80.9527359008789, "vf_explained_var": 0.7635239958763123, "kl": 0.0017555366503074765, "entropy": 1.115132212638855, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4812800, "episodes_total": 12032, "training_iteration": 376, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-11_23-59-32", "timestamp": 1660258772, "time_this_iter_s": 29.115790128707886, "time_total_s": 17183.12381029129, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17183.12381029129, "timesteps_since_restore": 4812800, "iterations_since_restore": 376, "perf": {"cpu_util_percent": 34.046341463414635, "ram_util_percent": 58.78536585365854}} -{"episode_reward_max": 639.0, "episode_reward_min": 558.0, "episode_reward_mean": 615.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 273.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.97}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.14, "shaped_reward_min": 158, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.19, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.69, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.54, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.69, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.69, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0, 630.0, 636.0, 627.0, 581.0, 636.0, 630.0, 630.0, 636.0, 630.0, 636.0, 564.0, 633.0, 633.0, 627.0, 630.0, 587.0, 627.0, 621.0, 576.0, 582.0, 630.0, 582.0, 639.0, 636.0, 636.0, 582.0, 630.0, 633.0, 575.0, 630.0, 587.0, 582.0, 639.0, 639.0, 587.0, 630.0, 582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 319.0, 311.0, 314.0, 322.0, 314.0, 313.0, 282.0, 299.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 316.0, 320.0, 313.0, 317.0, 314.0, 322.0, 275.0, 289.0, 313.0, 320.0, 321.0, 312.0, 308.0, 319.0, 316.0, 314.0, 294.0, 293.0, 314.0, 313.0, 302.0, 319.0, 285.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 294.0, 317.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 316.0, 314.0, 319.0, 314.0, 287.0, 288.0, 314.0, 316.0, 288.0, 299.0, 288.0, 294.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8504378248511929, "mean_processing_ms": 0.23845860870627447, "mean_inference_ms": 1.448146633207985}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9048000, "num_steps_sampled": 4825600, "sample_time_ms": 21404.603, "load_time_ms": 37.763, "grad_time_ms": 10323.133, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002214103704318404, "policy_loss": -0.005506592337042093, "vf_loss": 82.8126449584961, "vf_explained_var": 0.766756534576416, "kl": 0.0020635148975998163, "entropy": 1.1211366653442383, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4825600, "episodes_total": 12064, "training_iteration": 377, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-00-03", "timestamp": 1660258803, "time_this_iter_s": 30.737117767333984, "time_total_s": 17213.860928058624, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17213.860928058624, "timesteps_since_restore": 4825600, "iterations_since_restore": 377, "perf": {"cpu_util_percent": 36.85227272727273, "ram_util_percent": 58.872727272727275}} -{"episode_reward_max": 639.0, "episode_reward_min": 546.0, "episode_reward_mean": 616.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 264.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.28}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.76, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.81, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.7, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.06, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.3, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.24, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.7, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.7, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0, 639.0, 639.0, 587.0, 630.0, 582.0, 630.0, 639.0, 636.0, 636.0, 636.0, 633.0, 633.0, 636.0, 590.0, 639.0, 582.0, 587.0, 627.0, 636.0, 581.0, 579.0, 582.0, 627.0, 639.0, 576.0, 630.0, 633.0, 633.0, 570.0, 630.0, 639.0, 639.0, 630.0, 582.0, 581.0, 630.0, 630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 317.0, 322.0, 317.0, 322.0, 296.0, 291.0, 311.0, 319.0, 281.0, 301.0, 321.0, 309.0, 317.0, 322.0, 311.0, 325.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 321.0, 312.0, 311.0, 325.0, 293.0, 297.0, 320.0, 319.0, 288.0, 294.0, 291.0, 296.0, 316.0, 311.0, 314.0, 322.0, 281.0, 300.0, 296.0, 283.0, 288.0, 294.0, 319.0, 308.0, 314.0, 325.0, 288.0, 288.0, 316.0, 314.0, 309.0, 324.0, 316.0, 317.0, 283.0, 287.0, 314.0, 316.0, 314.0, 325.0, 319.0, 320.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8493908771025369, "mean_processing_ms": 0.23825289023894292, "mean_inference_ms": 1.447125693575985}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9072000, "num_steps_sampled": 4838400, "sample_time_ms": 21529.609, "load_time_ms": 37.587, "grad_time_ms": 10211.527, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015308427391573787, "policy_loss": -0.006051002535969019, "vf_loss": 81.36373901367188, "vf_explained_var": 0.7675411701202393, "kl": 0.00216054730117321, "entropy": 1.1090576648712158, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4838400, "episodes_total": 12096, "training_iteration": 378, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-00-35", "timestamp": 1660258835, "time_this_iter_s": 32.476667165756226, "time_total_s": 17246.33759522438, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17246.33759522438, "timesteps_since_restore": 4838400, "iterations_since_restore": 378, "perf": {"cpu_util_percent": 34.55434782608695, "ram_util_percent": 59.1304347826087}} -{"episode_reward_max": 639.0, "episode_reward_min": 468.0, "episode_reward_mean": 614.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.265}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.93, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.14, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0, 630.0, 582.0, 581.0, 630.0, 630.0, 581.0, 630.0, 630.0, 587.0, 636.0, 639.0, 558.0, 633.0, 582.0, 636.0, 582.0, 639.0, 633.0, 636.0, 587.0, 636.0, 636.0, 582.0, 630.0, 584.0, 636.0, 587.0, 633.0, 639.0, 564.0, 639.0, 630.0, 636.0, 582.0, 630.0, 639.0, 633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 318.0, 312.0, 293.0, 289.0, 299.0, 282.0, 316.0, 314.0, 316.0, 314.0, 298.0, 283.0, 305.0, 325.0, 314.0, 316.0, 292.0, 295.0, 319.0, 317.0, 319.0, 320.0, 279.0, 279.0, 316.0, 317.0, 293.0, 289.0, 317.0, 319.0, 296.0, 286.0, 320.0, 319.0, 311.0, 322.0, 317.0, 319.0, 280.0, 307.0, 314.0, 322.0, 317.0, 319.0, 293.0, 289.0, 316.0, 314.0, 302.0, 282.0, 316.0, 320.0, 285.0, 302.0, 317.0, 316.0, 322.0, 317.0, 291.0, 273.0, 325.0, 314.0, 319.0, 311.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.84835648025371, "mean_processing_ms": 0.23804887644488537, "mean_inference_ms": 1.446224605883411}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9096000, "num_steps_sampled": 4851200, "sample_time_ms": 21545.942, "load_time_ms": 37.194, "grad_time_ms": 9930.811, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008399917860515416, "policy_loss": -0.0065034665167331696, "vf_loss": 79.03890991210938, "vf_explained_var": 0.7710984349250793, "kl": 0.0017613372765481472, "entropy": 1.1208573579788208, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4851200, "episodes_total": 12128, "training_iteration": 379, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-01-07", "timestamp": 1660258867, "time_this_iter_s": 32.251976013183594, "time_total_s": 17278.589571237564, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17278.589571237564, "timesteps_since_restore": 4851200, "iterations_since_restore": 379, "perf": {"cpu_util_percent": 33.40222222222222, "ram_util_percent": 58.955555555555534}} -{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 614.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.18}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.76, "shaped_reward_min": 146, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.65, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0, 636.0, 582.0, 630.0, 639.0, 633.0, 630.0, 587.0, 639.0, 576.0, 639.0, 633.0, 639.0, 636.0, 630.0, 636.0, 633.0, 633.0, 587.0, 636.0, 582.0, 593.0, 546.0, 579.0, 633.0, 639.0, 573.0, 630.0, 636.0, 633.0, 587.0, 590.0, 627.0, 630.0, 639.0, 636.0, 636.0, 573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 324.0, 312.0, 296.0, 286.0, 315.0, 315.0, 314.0, 325.0, 316.0, 317.0, 308.0, 322.0, 290.0, 297.0, 322.0, 317.0, 288.0, 288.0, 320.0, 319.0, 314.0, 319.0, 325.0, 314.0, 312.0, 324.0, 316.0, 314.0, 316.0, 320.0, 311.0, 322.0, 319.0, 314.0, 291.0, 296.0, 319.0, 317.0, 288.0, 294.0, 294.0, 299.0, 264.0, 282.0, 288.0, 291.0, 319.0, 314.0, 316.0, 323.0, 279.0, 294.0, 321.0, 309.0, 317.0, 319.0, 316.0, 317.0, 293.0, 294.0, 296.0, 294.0, 310.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8473261545871941, "mean_processing_ms": 0.23784601732362667, "mean_inference_ms": 1.4452831057564066}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9120000, "num_steps_sampled": 4864000, "sample_time_ms": 21329.654, "load_time_ms": 36.87, "grad_time_ms": 9824.642, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010081265354529023, "policy_loss": -0.006387272384017706, "vf_loss": 79.55323028564453, "vf_explained_var": 0.7746841311454773, "kl": 0.001845820457674563, "entropy": 1.1198536157608032, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4864000, "episodes_total": 12160, "training_iteration": 380, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-01-38", "timestamp": 1660258898, "time_this_iter_s": 30.608419179916382, "time_total_s": 17309.19799041748, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17309.19799041748, "timesteps_since_restore": 4864000, "iterations_since_restore": 380, "perf": {"cpu_util_percent": 34.48139534883721, "ram_util_percent": 58.923255813953475}} -{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 615.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.95}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.11, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0, 630.0, 639.0, 636.0, 636.0, 573.0, 639.0, 633.0, 636.0, 630.0, 570.0, 630.0, 633.0, 579.0, 630.0, 639.0, 593.0, 633.0, 636.0, 627.0, 579.0, 590.0, 636.0, 468.0, 587.0, 576.0, 582.0, 582.0, 630.0, 636.0, 636.0, 633.0, 633.0, 630.0, 633.0, 630.0, 630.0, 627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 314.0, 316.0, 319.0, 320.0, 319.0, 317.0, 316.0, 320.0, 285.0, 288.0, 322.0, 317.0, 316.0, 317.0, 316.0, 320.0, 311.0, 319.0, 282.0, 288.0, 316.0, 314.0, 319.0, 314.0, 294.0, 285.0, 314.0, 316.0, 319.0, 320.0, 307.0, 286.0, 314.0, 319.0, 314.0, 322.0, 313.0, 314.0, 287.0, 292.0, 288.0, 302.0, 324.0, 312.0, 237.0, 231.0, 291.0, 296.0, 282.0, 294.0, 291.0, 291.0, 289.0, 293.0, 319.0, 311.0, 314.0, 322.0, 324.0, 312.0, 314.0, 319.0, 314.0, 319.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8462923346422779, "mean_processing_ms": 0.23764003789944027, "mean_inference_ms": 1.4442668898213196}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9144000, "num_steps_sampled": 4876800, "sample_time_ms": 21323.81, "load_time_ms": 36.975, "grad_time_ms": 9811.843, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005420349538326263, "policy_loss": -0.0019105566898360848, "vf_loss": 78.93695068359375, "vf_explained_var": 0.772759735584259, "kl": 0.0018517466960474849, "entropy": 1.1255789995193481, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4876800, "episodes_total": 12192, "training_iteration": 381, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-02-08", "timestamp": 1660258928, "time_this_iter_s": 30.169427156448364, "time_total_s": 17339.36741757393, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17339.36741757393, "timesteps_since_restore": 4876800, "iterations_since_restore": 381, "perf": {"cpu_util_percent": 34.1, "ram_util_percent": 58.95116279069769}} -{"episode_reward_max": 639.0, "episode_reward_min": 467.0, "episode_reward_mean": 615.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.735}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.67, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.69, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.01, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.93, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.19, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.93, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.93, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 630.0, 630.0, 627.0, 582.0, 636.0, 467.0, 576.0, 639.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 579.0, 587.0, 636.0, 579.0, 633.0, 636.0, 582.0, 639.0, 630.0, 633.0, 633.0, 633.0, 630.0, 627.0, 633.0, 627.0, 582.0, 633.0, 633.0, 636.0, 579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 314.0, 316.0, 316.0, 317.0, 308.0, 322.0, 319.0, 311.0, 308.0, 319.0, 286.0, 296.0, 319.0, 317.0, 230.0, 237.0, 287.0, 289.0, 317.0, 322.0, 291.0, 291.0, 288.0, 294.0, 310.0, 317.0, 314.0, 322.0, 322.0, 317.0, 308.0, 319.0, 290.0, 289.0, 298.0, 289.0, 324.0, 312.0, 291.0, 288.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 319.0, 320.0, 311.0, 319.0, 311.0, 322.0, 319.0, 314.0, 318.0, 315.0, 316.0, 314.0, 316.0, 311.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.845260231721656, "mean_processing_ms": 0.23743439147057216, "mean_inference_ms": 1.443180349457874}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9168000, "num_steps_sampled": 4889600, "sample_time_ms": 21404.999, "load_time_ms": 36.99, "grad_time_ms": 9811.226, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002549513941630721, "policy_loss": -0.004884073510766029, "vf_loss": 79.93880462646484, "vf_explained_var": 0.7685417532920837, "kl": 0.002187439240515232, "entropy": 1.1205859184265137, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4889600, "episodes_total": 12224, "training_iteration": 382, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-02-39", "timestamp": 1660258959, "time_this_iter_s": 30.454362154006958, "time_total_s": 17369.821779727936, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17369.821779727936, "timesteps_since_restore": 4889600, "iterations_since_restore": 382, "perf": {"cpu_util_percent": 33.88139534883721, "ram_util_percent": 58.944186046511625}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.555}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.11, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.09, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.61, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.82, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.05, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0, 582.0, 633.0, 633.0, 636.0, 579.0, 587.0, 636.0, 639.0, 639.0, 633.0, 630.0, 587.0, 636.0, 636.0, 627.0, 639.0, 636.0, 633.0, 569.0, 633.0, 522.0, 630.0, 636.0, 633.0, 633.0, 630.0, 633.0, 579.0, 636.0, 633.0, 633.0, 639.0, 584.0, 633.0, 633.0, 630.0, 633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 291.0, 291.0, 311.0, 322.0, 321.0, 312.0, 319.0, 317.0, 288.0, 291.0, 293.0, 294.0, 313.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 314.0, 316.0, 314.0, 293.0, 294.0, 316.0, 320.0, 314.0, 322.0, 311.0, 316.0, 319.0, 320.0, 314.0, 322.0, 314.0, 319.0, 287.0, 282.0, 311.0, 322.0, 264.0, 258.0, 311.0, 319.0, 319.0, 317.0, 319.0, 314.0, 317.0, 316.0, 321.0, 309.0, 319.0, 314.0, 288.0, 291.0, 319.0, 317.0, 311.0, 322.0, 319.0, 314.0, 322.0, 317.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8442347187033086, "mean_processing_ms": 0.23722937400390215, "mean_inference_ms": 1.442092522505549}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9192000, "num_steps_sampled": 4902400, "sample_time_ms": 21412.011, "load_time_ms": 37.029, "grad_time_ms": 9601.556, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016393003752455115, "policy_loss": -0.005780236795544624, "vf_loss": 79.79308319091797, "vf_explained_var": 0.7686330676078796, "kl": 0.001640369649976492, "entropy": 1.1195478439331055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4902400, "episodes_total": 12256, "training_iteration": 383, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-03-09", "timestamp": 1660258989, "time_this_iter_s": 30.096380949020386, "time_total_s": 17399.918160676956, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17399.918160676956, "timesteps_since_restore": 4902400, "iterations_since_restore": 383, "perf": {"cpu_util_percent": 34.86279069767443, "ram_util_percent": 58.95348837209304}} -{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 614.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.08}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.16, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.96, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.63, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0, 584.0, 633.0, 633.0, 630.0, 633.0, 633.0, 633.0, 630.0, 633.0, 636.0, 567.0, 627.0, 582.0, 630.0, 630.0, 582.0, 633.0, 636.0, 579.0, 636.0, 636.0, 630.0, 630.0, 630.0, 582.0, 630.0, 633.0, 630.0, 578.0, 587.0, 530.0, 587.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 294.0, 290.0, 321.0, 312.0, 319.0, 314.0, 311.0, 319.0, 309.0, 324.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 316.0, 317.0, 319.0, 317.0, 282.0, 285.0, 329.0, 298.0, 291.0, 291.0, 308.0, 322.0, 308.0, 322.0, 291.0, 291.0, 309.0, 324.0, 319.0, 317.0, 290.0, 289.0, 319.0, 317.0, 316.0, 320.0, 326.0, 304.0, 316.0, 314.0, 314.0, 316.0, 290.0, 292.0, 314.0, 316.0, 319.0, 314.0, 318.0, 312.0, 293.0, 285.0, 296.0, 291.0, 271.0, 259.0, 293.0, 294.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8432201147992054, "mean_processing_ms": 0.23702861990301977, "mean_inference_ms": 1.4411244641965177}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9216000, "num_steps_sampled": 4915200, "sample_time_ms": 21556.377, "load_time_ms": 37.038, "grad_time_ms": 9524.16, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002193765016272664, "policy_loss": -0.0053141750395298, "vf_loss": 80.70391082763672, "vf_explained_var": 0.7705557942390442, "kl": 0.0018228074768558145, "entropy": 1.124890685081482, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4915200, "episodes_total": 12288, "training_iteration": 384, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-03-43", "timestamp": 1660259023, "time_this_iter_s": 33.80204796791077, "time_total_s": 17433.720208644867, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17433.720208644867, "timesteps_since_restore": 4915200, "iterations_since_restore": 384, "perf": {"cpu_util_percent": 36.197872340425526, "ram_util_percent": 59.73617021276596}} -{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 611.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.76}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.52, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.34, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.77, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.34, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.77, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.34, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.77, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0, 630.0, 630.0, 522.0, 582.0, 630.0, 633.0, 587.0, 630.0, 582.0, 582.0, 633.0, 627.0, 576.0, 587.0, 624.0, 636.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 630.0, 636.0, 584.0, 570.0, 630.0, 639.0, 630.0, 630.0, 630.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 316.0, 314.0, 313.0, 317.0, 265.0, 257.0, 293.0, 289.0, 316.0, 314.0, 319.0, 314.0, 291.0, 296.0, 313.0, 317.0, 291.0, 291.0, 285.0, 297.0, 319.0, 314.0, 314.0, 313.0, 291.0, 285.0, 296.0, 291.0, 304.0, 320.0, 314.0, 322.0, 316.0, 317.0, 318.0, 315.0, 319.0, 317.0, 311.0, 319.0, 311.0, 319.0, 317.0, 316.0, 311.0, 319.0, 317.0, 319.0, 299.0, 285.0, 285.0, 285.0, 308.0, 322.0, 319.0, 320.0, 316.0, 314.0, 316.0, 314.0, 308.0, 322.0, 319.0, 320.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8422101834912255, "mean_processing_ms": 0.23682867408475425, "mean_inference_ms": 1.4402525240623634}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9240000, "num_steps_sampled": 4928000, "sample_time_ms": 21695.699, "load_time_ms": 36.564, "grad_time_ms": 9521.463, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004186244681477547, "policy_loss": -0.0031982522923499346, "vf_loss": 79.4544906616211, "vf_explained_var": 0.7711065411567688, "kl": 0.002083372324705124, "entropy": 1.1219121217727661, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4928000, "episodes_total": 12320, "training_iteration": 385, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-04-16", "timestamp": 1660259056, "time_this_iter_s": 33.08577585220337, "time_total_s": 17466.80598449707, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17466.80598449707, "timesteps_since_restore": 4928000, "iterations_since_restore": 385, "perf": {"cpu_util_percent": 33.6468085106383, "ram_util_percent": 59.0808510638298}} -{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 609.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.845}, "custom_metrics": {"sparse_reward_mean": 211.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.29, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.23, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0, 582.0, 633.0, 636.0, 633.0, 633.0, 630.0, 639.0, 584.0, 507.0, 573.0, 630.0, 582.0, 630.0, 633.0, 579.0, 582.0, 564.0, 633.0, 636.0, 582.0, 639.0, 582.0, 621.0, 636.0, 630.0, 633.0, 639.0, 579.0, 639.0, 639.0, 639.0, 639.0, 579.0, 639.0, 579.0, 636.0, 573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 288.0, 294.0, 317.0, 316.0, 318.0, 318.0, 319.0, 314.0, 317.0, 316.0, 306.0, 324.0, 314.0, 325.0, 293.0, 291.0, 253.0, 254.0, 291.0, 282.0, 314.0, 316.0, 288.0, 294.0, 319.0, 311.0, 324.0, 309.0, 287.0, 292.0, 293.0, 289.0, 274.0, 290.0, 322.0, 311.0, 316.0, 320.0, 288.0, 294.0, 316.0, 323.0, 293.0, 289.0, 308.0, 313.0, 319.0, 317.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 293.0, 286.0, 319.0, 320.0, 317.0, 322.0, 319.0, 320.0, 319.0, 320.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8412065546272243, "mean_processing_ms": 0.23663028686685655, "mean_inference_ms": 1.4395227599407487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9264000, "num_steps_sampled": 4940800, "sample_time_ms": 21927.826, "load_time_ms": 36.438, "grad_time_ms": 9711.085, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002509244019165635, "policy_loss": -0.005174743477255106, "vf_loss": 82.38806915283203, "vf_explained_var": 0.7595655918121338, "kl": 0.0020332231651991606, "entropy": 1.1096264123916626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4940800, "episodes_total": 12352, "training_iteration": 386, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-04-49", "timestamp": 1660259089, "time_this_iter_s": 33.33205199241638, "time_total_s": 17500.138036489487, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17500.138036489487, "timesteps_since_restore": 4940800, "iterations_since_restore": 386, "perf": {"cpu_util_percent": 33.47234042553192, "ram_util_percent": 59.114893617021295}} -{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 607.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.515}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.63, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.05, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0, 579.0, 639.0, 579.0, 636.0, 573.0, 630.0, 582.0, 633.0, 639.0, 630.0, 633.0, 630.0, 587.0, 567.0, 633.0, 582.0, 639.0, 582.0, 630.0, 582.0, 582.0, 630.0, 582.0, 582.0, 630.0, 630.0, 576.0, 636.0, 573.0, 573.0, 633.0, 582.0, 587.0, 636.0, 636.0, 579.0, 636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 290.0, 289.0, 317.0, 322.0, 279.0, 300.0, 322.0, 314.0, 285.0, 288.0, 316.0, 314.0, 283.0, 299.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 293.0, 294.0, 282.0, 285.0, 316.0, 317.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 317.0, 313.0, 290.0, 292.0, 288.0, 294.0, 308.0, 322.0, 292.0, 290.0, 297.0, 285.0, 313.0, 317.0, 316.0, 314.0, 285.0, 291.0, 322.0, 314.0, 288.0, 285.0, 284.0, 289.0, 319.0, 314.0, 282.0, 300.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.840202535823751, "mean_processing_ms": 0.2364293047857562, "mean_inference_ms": 1.4386641797535789}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9288000, "num_steps_sampled": 4953600, "sample_time_ms": 21835.055, "load_time_ms": 36.557, "grad_time_ms": 9742.403, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00015397991228383034, "policy_loss": -0.007473704870790243, "vf_loss": 81.87383270263672, "vf_explained_var": 0.7745316028594971, "kl": 0.0020445811096578836, "entropy": 1.1193923950195312, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4953600, "episodes_total": 12384, "training_iteration": 387, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-05-19", "timestamp": 1660259119, "time_this_iter_s": 30.124536752700806, "time_total_s": 17530.262573242188, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17530.262573242188, "timesteps_since_restore": 4953600, "iterations_since_restore": 387, "perf": {"cpu_util_percent": 31.948837209302326, "ram_util_percent": 58.95581395348838}} -{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 611.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 305.82}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 187.64, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.96, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.51, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.09, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.51, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.51, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0, 587.0, 636.0, 636.0, 579.0, 636.0, 590.0, 630.0, 636.0, 525.0, 530.0, 582.0, 587.0, 633.0, 627.0, 639.0, 624.0, 582.0, 630.0, 639.0, 630.0, 582.0, 636.0, 582.0, 522.0, 636.0, 633.0, 630.0, 627.0, 519.0, 639.0, 627.0, 636.0, 633.0, 636.0, 636.0, 627.0, 630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 283.0, 304.0, 319.0, 317.0, 314.0, 322.0, 288.0, 291.0, 319.0, 317.0, 296.0, 294.0, 314.0, 316.0, 322.0, 314.0, 262.0, 263.0, 281.0, 249.0, 288.0, 294.0, 294.0, 293.0, 316.0, 317.0, 316.0, 311.0, 319.0, 320.0, 308.0, 316.0, 291.0, 291.0, 308.0, 322.0, 319.0, 320.0, 322.0, 308.0, 288.0, 294.0, 317.0, 319.0, 296.0, 286.0, 249.0, 273.0, 314.0, 322.0, 316.0, 317.0, 308.0, 322.0, 311.0, 316.0, 268.0, 251.0, 322.0, 317.0, 316.0, 311.0, 317.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8391970766904484, "mean_processing_ms": 0.23622675772030782, "mean_inference_ms": 1.4376630323224628}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9312000, "num_steps_sampled": 4966400, "sample_time_ms": 21599.042, "load_time_ms": 36.591, "grad_time_ms": 9790.428, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022788590285927057, "policy_loss": -0.00476012472063303, "vf_loss": 75.92620849609375, "vf_explained_var": 0.7665655016899109, "kl": 0.0023904216941446066, "entropy": 1.1072710752487183, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4966400, "episodes_total": 12416, "training_iteration": 388, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-05-50", "timestamp": 1660259150, "time_this_iter_s": 30.593504667282104, "time_total_s": 17560.85607790947, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17560.85607790947, "timesteps_since_restore": 4966400, "iterations_since_restore": 388, "perf": {"cpu_util_percent": 33.502325581395354, "ram_util_percent": 59.08372093023256}} -{"episode_reward_max": 639.0, "episode_reward_min": 365.0, "episode_reward_mean": 614.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 182.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.46}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 188.12, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.31, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0, 633.0, 636.0, 636.0, 627.0, 630.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 582.0, 633.0, 633.0, 630.0, 582.0, 627.0, 630.0, 636.0, 506.0, 630.0, 639.0, 630.0, 582.0, 630.0, 590.0, 584.0, 584.0, 365.0, 639.0, 582.0, 639.0, 587.0, 633.0, 633.0, 627.0, 582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 316.0, 311.0, 316.0, 314.0, 285.0, 297.0, 315.0, 324.0, 313.0, 320.0, 319.0, 317.0, 322.0, 317.0, 282.0, 276.0, 296.0, 286.0, 319.0, 314.0, 317.0, 316.0, 302.0, 328.0, 291.0, 291.0, 312.0, 315.0, 303.0, 327.0, 314.0, 322.0, 249.0, 257.0, 316.0, 314.0, 317.0, 322.0, 316.0, 314.0, 287.0, 295.0, 319.0, 311.0, 299.0, 291.0, 293.0, 291.0, 296.0, 288.0, 183.0, 182.0, 320.0, 319.0, 288.0, 294.0, 319.0, 320.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.838196919665678, "mean_processing_ms": 0.23602508803676062, "mean_inference_ms": 1.4365150313753652}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9336000, "num_steps_sampled": 4979200, "sample_time_ms": 21400.03, "load_time_ms": 36.799, "grad_time_ms": 9763.193, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015943764010444283, "policy_loss": -0.005665285978466272, "vf_loss": 78.14656829833984, "vf_explained_var": 0.7691711783409119, "kl": 0.0016816608840599656, "entropy": 1.1099879741668701, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4979200, "episodes_total": 12448, "training_iteration": 389, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-06-20", "timestamp": 1660259180, "time_this_iter_s": 29.990082025527954, "time_total_s": 17590.846159934998, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17590.846159934998, "timesteps_since_restore": 4979200, "iterations_since_restore": 389, "perf": {"cpu_util_percent": 36.06279069767442, "ram_util_percent": 59.181395348837206}} -{"episode_reward_max": 639.0, "episode_reward_min": 567.0, "episode_reward_mean": 617.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 275.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.805}, "custom_metrics": {"sparse_reward_mean": 214.4, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.81, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.97, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.76, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.18, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0, 587.0, 633.0, 633.0, 627.0, 582.0, 639.0, 639.0, 627.0, 627.0, 633.0, 627.0, 639.0, 636.0, 636.0, 584.0, 573.0, 636.0, 630.0, 573.0, 630.0, 633.0, 630.0, 627.0, 636.0, 630.0, 636.0, 630.0, 630.0, 627.0, 636.0, 630.0, 630.0, 587.0, 579.0, 570.0, 633.0, 633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 291.0, 316.0, 317.0, 317.0, 316.0, 308.0, 319.0, 291.0, 291.0, 317.0, 322.0, 317.0, 322.0, 311.0, 316.0, 308.0, 319.0, 319.0, 314.0, 314.0, 313.0, 316.0, 323.0, 314.0, 322.0, 319.0, 317.0, 291.0, 293.0, 298.0, 275.0, 324.0, 312.0, 323.0, 307.0, 281.0, 292.0, 308.0, 322.0, 317.0, 316.0, 311.0, 319.0, 313.0, 314.0, 319.0, 317.0, 314.0, 316.0, 319.0, 317.0, 316.0, 314.0, 316.0, 314.0, 316.0, 311.0, 314.0, 322.0, 314.0, 316.0, 316.0, 314.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8371997037946857, "mean_processing_ms": 0.23582575562090372, "mean_inference_ms": 1.4353288532965658}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9360000, "num_steps_sampled": 4992000, "sample_time_ms": 21348.738, "load_time_ms": 36.486, "grad_time_ms": 9719.814, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004423701611813158, "policy_loss": -0.007186357397586107, "vf_loss": 81.85875701904297, "vf_explained_var": 0.7682639956474304, "kl": 0.002242224058136344, "entropy": 1.114312767982483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 4992000, "episodes_total": 12480, "training_iteration": 390, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-06-49", "timestamp": 1660259209, "time_this_iter_s": 29.65726089477539, "time_total_s": 17620.503420829773, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17620.503420829773, "timesteps_since_restore": 4992000, "iterations_since_restore": 390, "perf": {"cpu_util_percent": 34.275609756097566, "ram_util_percent": 59.390243902439025}} -{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 614.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.385}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.37, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0, 587.0, 579.0, 570.0, 633.0, 633.0, 630.0, 639.0, 579.0, 582.0, 633.0, 579.0, 633.0, 636.0, 630.0, 633.0, 609.0, 582.0, 636.0, 630.0, 636.0, 576.0, 636.0, 633.0, 630.0, 636.0, 582.0, 582.0, 636.0, 630.0, 633.0, 630.0, 636.0, 579.0, 633.0, 570.0, 633.0, 582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 293.0, 294.0, 288.0, 291.0, 295.0, 275.0, 319.0, 314.0, 311.0, 322.0, 319.0, 311.0, 317.0, 322.0, 282.0, 297.0, 294.0, 288.0, 316.0, 317.0, 288.0, 291.0, 311.0, 322.0, 314.0, 322.0, 313.0, 317.0, 317.0, 316.0, 310.0, 299.0, 293.0, 289.0, 317.0, 319.0, 322.0, 308.0, 319.0, 317.0, 288.0, 288.0, 314.0, 322.0, 316.0, 317.0, 314.0, 316.0, 322.0, 314.0, 285.0, 297.0, 294.0, 288.0, 314.0, 322.0, 321.0, 309.0, 316.0, 317.0, 311.0, 319.0, 311.0, 325.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.83620884479323, "mean_processing_ms": 0.23562873648902904, "mean_inference_ms": 1.4341204983718234}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9384000, "num_steps_sampled": 5004800, "sample_time_ms": 21231.946, "load_time_ms": 36.725, "grad_time_ms": 9739.689, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004321941174566746, "policy_loss": -0.0030343374237418175, "vf_loss": 79.1146011352539, "vf_explained_var": 0.7782495021820068, "kl": 0.0022527193650603294, "entropy": 1.1103630065917969, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5004800, "episodes_total": 12512, "training_iteration": 391, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-07-19", "timestamp": 1660259239, "time_this_iter_s": 29.202332973480225, "time_total_s": 17649.705753803253, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17649.705753803253, "timesteps_since_restore": 5004800, "iterations_since_restore": 391, "perf": {"cpu_util_percent": 32.102380952380955, "ram_util_percent": 59.785714285714285}} -{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 613.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.805}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.41, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.1, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.1, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.1, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0, 579.0, 633.0, 570.0, 633.0, 582.0, 633.0, 633.0, 582.0, 579.0, 582.0, 633.0, 579.0, 633.0, 633.0, 576.0, 567.0, 636.0, 576.0, 636.0, 633.0, 639.0, 636.0, 579.0, 636.0, 627.0, 636.0, 636.0, 630.0, 630.0, 633.0, 633.0, 633.0, 579.0, 639.0, 633.0, 579.0, 593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 282.0, 297.0, 319.0, 314.0, 287.0, 283.0, 319.0, 314.0, 296.0, 286.0, 319.0, 314.0, 311.0, 322.0, 285.0, 297.0, 287.0, 292.0, 291.0, 291.0, 316.0, 317.0, 288.0, 291.0, 310.0, 323.0, 317.0, 316.0, 298.0, 278.0, 282.0, 285.0, 314.0, 322.0, 292.0, 284.0, 316.0, 320.0, 318.0, 315.0, 317.0, 322.0, 319.0, 317.0, 288.0, 291.0, 321.0, 315.0, 308.0, 319.0, 314.0, 322.0, 314.0, 322.0, 319.0, 311.0, 312.0, 318.0, 316.0, 317.0, 316.0, 317.0, 319.0, 314.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8352162722416216, "mean_processing_ms": 0.2354315996686975, "mean_inference_ms": 1.4328297296809587}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9408000, "num_steps_sampled": 5017600, "sample_time_ms": 20999.824, "load_time_ms": 36.668, "grad_time_ms": 9737.715, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033923883456736803, "policy_loss": -0.004631926771253347, "vf_loss": 85.770751953125, "vf_explained_var": 0.7699734568595886, "kl": 0.0020241406746208668, "entropy": 1.1055186986923218, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5017600, "episodes_total": 12544, "training_iteration": 392, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-07-47", "timestamp": 1660259267, "time_this_iter_s": 28.112272024154663, "time_total_s": 17677.818025827408, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17677.818025827408, "timesteps_since_restore": 5017600, "iterations_since_restore": 392, "perf": {"cpu_util_percent": 32.03076923076923, "ram_util_percent": 59.29230769230767}} -{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 615.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.81}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.22, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.53, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.04, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.01, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.47, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.01, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.01, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0, 579.0, 639.0, 633.0, 579.0, 593.0, 636.0, 633.0, 630.0, 630.0, 573.0, 630.0, 630.0, 630.0, 516.0, 636.0, 630.0, 633.0, 639.0, 630.0, 582.0, 633.0, 636.0, 636.0, 639.0, 639.0, 636.0, 567.0, 636.0, 636.0, 582.0, 636.0, 639.0, 636.0, 408.0, 636.0, 636.0, 579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 285.0, 294.0, 317.0, 322.0, 311.0, 322.0, 288.0, 291.0, 296.0, 297.0, 322.0, 314.0, 316.0, 317.0, 316.0, 314.0, 312.0, 318.0, 287.0, 286.0, 313.0, 317.0, 318.0, 312.0, 308.0, 322.0, 254.0, 262.0, 324.0, 312.0, 310.0, 320.0, 313.0, 320.0, 319.0, 320.0, 321.0, 309.0, 291.0, 291.0, 316.0, 317.0, 319.0, 317.0, 314.0, 322.0, 320.0, 319.0, 319.0, 320.0, 319.0, 317.0, 274.0, 293.0, 309.0, 327.0, 319.0, 317.0, 288.0, 294.0, 322.0, 314.0, 319.0, 320.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8342365209622892, "mean_processing_ms": 0.23523671051615616, "mean_inference_ms": 1.4317637574949895}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9432000, "num_steps_sampled": 5030400, "sample_time_ms": 21299.857, "load_time_ms": 37.172, "grad_time_ms": 9869.636, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 4.354631528258324e-05, "policy_loss": -0.0072962199337780476, "vf_loss": 78.87313079833984, "vf_explained_var": 0.7646682262420654, "kl": 0.0020736621227115393, "entropy": 1.0950974225997925, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5030400, "episodes_total": 12576, "training_iteration": 393, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-08-21", "timestamp": 1660259301, "time_this_iter_s": 34.420122146606445, "time_total_s": 17712.238147974014, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17712.238147974014, "timesteps_since_restore": 5030400, "iterations_since_restore": 393, "perf": {"cpu_util_percent": 32.710204081632654, "ram_util_percent": 59.30816326530613}} -{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 614.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 307.225}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.85, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.5, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0, 636.0, 408.0, 636.0, 636.0, 579.0, 576.0, 636.0, 639.0, 587.0, 636.0, 630.0, 633.0, 639.0, 639.0, 639.0, 587.0, 582.0, 525.0, 579.0, 636.0, 582.0, 636.0, 627.0, 630.0, 630.0, 587.0, 639.0, 633.0, 570.0, 630.0, 630.0, 639.0, 633.0, 627.0, 582.0, 576.0, 633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 319.0, 317.0, 198.0, 210.0, 314.0, 322.0, 313.0, 323.0, 291.0, 288.0, 290.0, 286.0, 322.0, 314.0, 322.0, 317.0, 299.0, 288.0, 314.0, 322.0, 313.0, 317.0, 321.0, 312.0, 314.0, 325.0, 319.0, 320.0, 317.0, 322.0, 293.0, 294.0, 288.0, 294.0, 270.0, 255.0, 289.0, 290.0, 322.0, 314.0, 286.0, 296.0, 319.0, 317.0, 313.0, 314.0, 316.0, 314.0, 316.0, 314.0, 290.0, 297.0, 319.0, 320.0, 319.0, 314.0, 279.0, 291.0, 315.0, 315.0, 316.0, 314.0, 317.0, 322.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8332639627657553, "mean_processing_ms": 0.23504271675970742, "mean_inference_ms": 1.430763529891338}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9456000, "num_steps_sampled": 5043200, "sample_time_ms": 21101.644, "load_time_ms": 36.639, "grad_time_ms": 9913.059, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006912912358529866, "policy_loss": -0.006923032458871603, "vf_loss": 81.60092163085938, "vf_explained_var": 0.7633647918701172, "kl": 0.001780605292879045, "entropy": 1.0915051698684692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5043200, "episodes_total": 12608, "training_iteration": 394, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-08-53", "timestamp": 1660259333, "time_this_iter_s": 32.24967384338379, "time_total_s": 17744.487821817398, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17744.487821817398, "timesteps_since_restore": 5043200, "iterations_since_restore": 394, "perf": {"cpu_util_percent": 33.69347826086956, "ram_util_percent": 59.52391304347825}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 329.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.82, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.74, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.9, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.74, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.74, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0, 633.0, 627.0, 582.0, 576.0, 633.0, 633.0, 639.0, 582.0, 630.0, 633.0, 630.0, 579.0, 633.0, 630.0, 633.0, 630.0, 633.0, 630.0, 636.0, 633.0, 582.0, 633.0, 636.0, 636.0, 633.0, 582.0, 579.0, 627.0, 633.0, 587.0, 587.0, 581.0, 639.0, 630.0, 636.0, 639.0, 579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 317.0, 316.0, 300.0, 327.0, 301.0, 281.0, 285.0, 291.0, 319.0, 314.0, 319.0, 314.0, 319.0, 320.0, 288.0, 294.0, 313.0, 317.0, 314.0, 319.0, 319.0, 311.0, 288.0, 291.0, 309.0, 324.0, 309.0, 321.0, 316.0, 317.0, 313.0, 317.0, 314.0, 319.0, 314.0, 316.0, 329.0, 307.0, 314.0, 319.0, 285.0, 297.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 321.0, 312.0, 291.0, 291.0, 287.0, 292.0, 308.0, 319.0, 314.0, 319.0, 283.0, 304.0, 293.0, 294.0, 291.0, 290.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8323023893129786, "mean_processing_ms": 0.23485230859207035, "mean_inference_ms": 1.4300119711867751}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9480000, "num_steps_sampled": 5056000, "sample_time_ms": 21142.709, "load_time_ms": 37.292, "grad_time_ms": 10082.816, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00028009479865431786, "policy_loss": -0.007545720785856247, "vf_loss": 83.69845581054688, "vf_explained_var": 0.7653185725212097, "kl": 0.001969197066500783, "entropy": 1.0880564451217651, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5056000, "episodes_total": 12640, "training_iteration": 395, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-09-29", "timestamp": 1660259369, "time_this_iter_s": 35.19951057434082, "time_total_s": 17779.68733239174, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17779.68733239174, "timesteps_since_restore": 5056000, "iterations_since_restore": 395, "perf": {"cpu_util_percent": 30.6265306122449, "ram_util_percent": 59.11836734693878}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 618.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.44}, "custom_metrics": {"sparse_reward_mean": 214.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.08, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.79, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.7, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.7, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.7, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0, 639.0, 630.0, 636.0, 639.0, 579.0, 624.0, 576.0, 576.0, 590.0, 590.0, 630.0, 633.0, 582.0, 636.0, 639.0, 633.0, 639.0, 639.0, 630.0, 627.0, 636.0, 636.0, 636.0, 587.0, 581.0, 636.0, 621.0, 639.0, 630.0, 633.0, 621.0, 636.0, 573.0, 636.0, 633.0, 522.0, 636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 322.0, 317.0, 316.0, 314.0, 322.0, 314.0, 317.0, 322.0, 288.0, 291.0, 312.0, 312.0, 290.0, 286.0, 287.0, 289.0, 297.0, 293.0, 294.0, 296.0, 316.0, 314.0, 319.0, 314.0, 286.0, 296.0, 311.0, 325.0, 319.0, 320.0, 314.0, 319.0, 320.0, 319.0, 322.0, 317.0, 316.0, 314.0, 308.0, 319.0, 322.0, 314.0, 314.0, 322.0, 309.0, 327.0, 296.0, 291.0, 290.0, 291.0, 316.0, 320.0, 309.0, 312.0, 317.0, 322.0, 316.0, 314.0, 316.0, 317.0, 313.0, 308.0, 319.0, 317.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8313395238935175, "mean_processing_ms": 0.2346607427713282, "mean_inference_ms": 1.4290797727914242}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9504000, "num_steps_sampled": 5068800, "sample_time_ms": 20854.832, "load_time_ms": 37.297, "grad_time_ms": 9880.508, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009566675289534032, "policy_loss": -0.0062157814390957355, "vf_loss": 77.21820068359375, "vf_explained_var": 0.7768221497535706, "kl": 0.0020733082201331854, "entropy": 1.0987348556518555, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5068800, "episodes_total": 12672, "training_iteration": 396, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-09-57", "timestamp": 1660259397, "time_this_iter_s": 28.42993927001953, "time_total_s": 17808.11727166176, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17808.11727166176, "timesteps_since_restore": 5068800, "iterations_since_restore": 396, "perf": {"cpu_util_percent": 34.480487804878045, "ram_util_percent": 59.09024390243902}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 616.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.265}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.33, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.89, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.64, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.93, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.93, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0, 573.0, 636.0, 633.0, 522.0, 636.0, 633.0, 633.0, 630.0, 576.0, 639.0, 639.0, 627.0, 636.0, 630.0, 636.0, 633.0, 587.0, 633.0, 579.0, 587.0, 630.0, 579.0, 633.0, 587.0, 636.0, 639.0, 627.0, 639.0, 630.0, 633.0, 639.0, 584.0, 590.0, 630.0, 636.0, 582.0, 636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 280.0, 293.0, 317.0, 319.0, 314.0, 319.0, 262.0, 260.0, 324.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 314.0, 285.0, 291.0, 322.0, 317.0, 319.0, 320.0, 316.0, 311.0, 317.0, 319.0, 308.0, 322.0, 317.0, 319.0, 314.0, 319.0, 296.0, 291.0, 317.0, 316.0, 281.0, 298.0, 293.0, 294.0, 311.0, 319.0, 290.0, 289.0, 316.0, 317.0, 299.0, 288.0, 319.0, 317.0, 319.0, 320.0, 308.0, 319.0, 319.0, 320.0, 311.0, 319.0, 314.0, 319.0, 322.0, 317.0, 293.0, 291.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8303852563726812, "mean_processing_ms": 0.2344708309042675, "mean_inference_ms": 1.4281787126468246}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9528000, "num_steps_sampled": 5081600, "sample_time_ms": 20985.415, "load_time_ms": 37.239, "grad_time_ms": 9808.735, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003967406693845987, "policy_loss": -0.0038024026434868574, "vf_loss": 83.1785888671875, "vf_explained_var": 0.769153892993927, "kl": 0.0019865171052515507, "entropy": 1.0961049795150757, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5081600, "episodes_total": 12704, "training_iteration": 397, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-10-28", "timestamp": 1660259428, "time_this_iter_s": 30.70757508277893, "time_total_s": 17838.824846744537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17838.824846744537, "timesteps_since_restore": 5081600, "iterations_since_restore": 397, "perf": {"cpu_util_percent": 33.08837209302326, "ram_util_percent": 59.13023255813955}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 617.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 308.56}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.52, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.54, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.17, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.17, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.17, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0, 590.0, 630.0, 636.0, 582.0, 636.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 630.0, 633.0, 630.0, 636.0, 636.0, 633.0, 576.0, 633.0, 582.0, 582.0, 627.0, 639.0, 639.0, 633.0, 636.0, 630.0, 560.0, 636.0, 636.0, 636.0, 582.0, 633.0, 636.0, 582.0, 578.0, 636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 294.0, 296.0, 316.0, 314.0, 319.0, 317.0, 299.0, 283.0, 311.0, 325.0, 316.0, 314.0, 317.0, 316.0, 314.0, 319.0, 319.0, 317.0, 289.0, 293.0, 321.0, 312.0, 311.0, 319.0, 313.0, 320.0, 313.0, 317.0, 321.0, 315.0, 319.0, 317.0, 319.0, 314.0, 285.0, 291.0, 311.0, 322.0, 285.0, 297.0, 291.0, 291.0, 313.0, 314.0, 327.0, 312.0, 324.0, 315.0, 319.0, 314.0, 314.0, 322.0, 321.0, 309.0, 279.0, 281.0, 319.0, 317.0, 316.0, 320.0, 314.0, 322.0, 291.0, 291.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8294402136883693, "mean_processing_ms": 0.23428178262037597, "mean_inference_ms": 1.4273260562333872}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9552000, "num_steps_sampled": 5094400, "sample_time_ms": 21312.43, "load_time_ms": 37.532, "grad_time_ms": 9807.592, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002816990716382861, "policy_loss": -0.004787659738212824, "vf_loss": 81.49095916748047, "vf_explained_var": 0.7696583867073059, "kl": 0.0025824178010225296, "entropy": 1.0888774394989014, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5094400, "episodes_total": 12736, "training_iteration": 398, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-11-02", "timestamp": 1660259462, "time_this_iter_s": 33.86050295829773, "time_total_s": 17872.685349702835, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17872.685349702835, "timesteps_since_restore": 5094400, "iterations_since_restore": 398, "perf": {"cpu_util_percent": 30.977083333333336, "ram_util_percent": 59.083333333333336}} -{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 609.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 304.595}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 187.59, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0, 633.0, 636.0, 582.0, 578.0, 636.0, 633.0, 639.0, 627.0, 582.0, 636.0, 630.0, 630.0, 636.0, 582.0, 587.0, 579.0, 639.0, 582.0, 578.0, 618.0, 570.0, 636.0, 567.0, 630.0, 582.0, 630.0, 582.0, 636.0, 639.0, 633.0, 582.0, 636.0, 636.0, 633.0, 636.0, 582.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 311.0, 322.0, 319.0, 317.0, 291.0, 291.0, 290.0, 288.0, 319.0, 317.0, 316.0, 317.0, 322.0, 317.0, 313.0, 314.0, 291.0, 291.0, 321.0, 315.0, 321.0, 309.0, 316.0, 314.0, 316.0, 320.0, 285.0, 297.0, 294.0, 293.0, 288.0, 291.0, 319.0, 320.0, 289.0, 293.0, 293.0, 285.0, 313.0, 305.0, 276.0, 294.0, 319.0, 317.0, 279.0, 288.0, 316.0, 314.0, 291.0, 291.0, 313.0, 317.0, 295.0, 287.0, 319.0, 317.0, 324.0, 315.0, 313.0, 320.0, 293.0, 289.0, 316.0, 320.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.82850652599667, "mean_processing_ms": 0.23409592110050972, "mean_inference_ms": 1.426659038222931}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9576000, "num_steps_sampled": 5107200, "sample_time_ms": 21603.638, "load_time_ms": 37.362, "grad_time_ms": 9958.009, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00230390764772892, "policy_loss": -0.00582013139501214, "vf_loss": 86.7406005859375, "vf_explained_var": 0.7753866314888, "kl": 0.0019396115094423294, "entropy": 1.1000421047210693, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5107200, "episodes_total": 12768, "training_iteration": 399, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-11-36", "timestamp": 1660259496, "time_this_iter_s": 34.40714716911316, "time_total_s": 17907.09249687195, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17907.09249687195, "timesteps_since_restore": 5107200, "iterations_since_restore": 399, "perf": {"cpu_util_percent": 33.638775510204084, "ram_util_percent": 59.13265306122449}} -{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 612.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 306.07}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.54, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.92, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0, 636.0, 633.0, 636.0, 582.0, 627.0, 576.0, 636.0, 636.0, 630.0, 582.0, 582.0, 639.0, 593.0, 639.0, 630.0, 630.0, 636.0, 633.0, 636.0, 630.0, 636.0, 636.0, 522.0, 633.0, 636.0, 636.0, 633.0, 636.0, 582.0, 576.0, 636.0, 627.0, 633.0, 636.0, 587.0, 633.0, 197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 320.0, 316.0, 308.0, 325.0, 314.0, 322.0, 285.0, 297.0, 313.0, 314.0, 282.0, 294.0, 308.0, 328.0, 314.0, 322.0, 318.0, 312.0, 287.0, 295.0, 291.0, 291.0, 317.0, 322.0, 299.0, 294.0, 317.0, 322.0, 311.0, 319.0, 317.0, 313.0, 319.0, 317.0, 316.0, 317.0, 321.0, 315.0, 316.0, 314.0, 318.0, 318.0, 322.0, 314.0, 257.0, 265.0, 319.0, 314.0, 317.0, 319.0, 322.0, 314.0, 314.0, 319.0, 314.0, 322.0, 289.0, 293.0, 282.0, 294.0, 316.0, 320.0, 311.0, 316.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8275880756644836, "mean_processing_ms": 0.23391670126994718, "mean_inference_ms": 1.4262180371248092}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9600000, "num_steps_sampled": 5120000, "sample_time_ms": 22135.485, "load_time_ms": 37.429, "grad_time_ms": 9991.096, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019203064730390906, "policy_loss": -0.005054800305515528, "vf_loss": 75.28291320800781, "vf_explained_var": 0.7728467583656311, "kl": 0.00209710281342268, "entropy": 1.106364130973816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5120000, "episodes_total": 12800, "training_iteration": 400, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-12-12", "timestamp": 1660259532, "time_this_iter_s": 35.30730485916138, "time_total_s": 17942.39980173111, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17942.39980173111, "timesteps_since_restore": 5120000, "iterations_since_restore": 400, "perf": {"cpu_util_percent": 33.525999999999996, "ram_util_percent": 59.168}} -{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 613.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.635}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.47, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.22, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.76, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.92, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.76, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.76, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0, 633.0, 636.0, 587.0, 633.0, 197.0, 582.0, 636.0, 630.0, 636.0, 579.0, 636.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 633.0, 633.0, 399.0, 636.0, 639.0, 636.0, 587.0, 576.0, 630.0, 582.0, 630.0, 587.0, 633.0, 636.0, 544.0, 624.0, 636.0, 581.0, 636.0, 636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 314.0, 319.0, 319.0, 317.0, 299.0, 288.0, 316.0, 317.0, 100.0, 97.0, 296.0, 286.0, 319.0, 317.0, 311.0, 319.0, 316.0, 320.0, 291.0, 288.0, 317.0, 319.0, 319.0, 317.0, 317.0, 322.0, 311.0, 319.0, 296.0, 286.0, 291.0, 291.0, 313.0, 317.0, 311.0, 322.0, 308.0, 325.0, 193.0, 206.0, 321.0, 315.0, 322.0, 317.0, 322.0, 314.0, 288.0, 299.0, 284.0, 292.0, 311.0, 319.0, 291.0, 291.0, 319.0, 311.0, 288.0, 299.0, 324.0, 309.0, 319.0, 317.0, 279.0, 265.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8266799330355084, "mean_processing_ms": 0.23374167551235864, "mean_inference_ms": 1.4257962598910456}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9624000, "num_steps_sampled": 5132800, "sample_time_ms": 22522.212, "load_time_ms": 37.519, "grad_time_ms": 10359.995, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000927128829061985, "policy_loss": -0.006055487785488367, "vf_loss": 75.37408447265625, "vf_explained_var": 0.7751708030700684, "kl": 0.0019053876167163253, "entropy": 1.1095930337905884, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5132800, "episodes_total": 12832, "training_iteration": 401, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-12-49", "timestamp": 1660259569, "time_this_iter_s": 36.76053810119629, "time_total_s": 17979.160339832306, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 17979.160339832306, "timesteps_since_restore": 5132800, "iterations_since_restore": 401, "perf": {"cpu_util_percent": 34.715094339622645, "ram_util_percent": 59.21886792452831}} -{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 619.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.825}, "custom_metrics": {"sparse_reward_mean": 214.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 190.05, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.47, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.77, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.54, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0, 624.0, 636.0, 581.0, 636.0, 636.0, 630.0, 587.0, 636.0, 636.0, 633.0, 579.0, 633.0, 582.0, 627.0, 630.0, 590.0, 639.0, 633.0, 579.0, 630.0, 630.0, 636.0, 639.0, 633.0, 630.0, 630.0, 630.0, 630.0, 639.0, 636.0, 633.0, 636.0, 582.0, 636.0, 582.0, 579.0, 630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 311.0, 313.0, 313.0, 323.0, 294.0, 287.0, 314.0, 322.0, 314.0, 322.0, 314.0, 316.0, 294.0, 293.0, 316.0, 320.0, 319.0, 317.0, 317.0, 316.0, 289.0, 290.0, 319.0, 314.0, 293.0, 289.0, 319.0, 308.0, 316.0, 314.0, 293.0, 297.0, 322.0, 317.0, 319.0, 314.0, 294.0, 285.0, 321.0, 309.0, 319.0, 311.0, 319.0, 317.0, 317.0, 322.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 308.0, 322.0, 313.0, 317.0, 314.0, 325.0, 317.0, 319.0, 316.0, 317.0, 319.0, 317.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8257902245780139, "mean_processing_ms": 0.23357282477056074, "mean_inference_ms": 1.425400068641748}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9648000, "num_steps_sampled": 5145600, "sample_time_ms": 23036.82, "load_time_ms": 37.721, "grad_time_ms": 10540.081, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013805682538077235, "policy_loss": -0.006049450021237135, "vf_loss": 79.8260269165039, "vf_explained_var": 0.7674198746681213, "kl": 0.002044239779934287, "entropy": 1.1051733493804932, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5145600, "episodes_total": 12864, "training_iteration": 402, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-13-24", "timestamp": 1660259604, "time_this_iter_s": 35.064194202423096, "time_total_s": 18014.22453403473, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18014.22453403473, "timesteps_since_restore": 5145600, "iterations_since_restore": 402, "perf": {"cpu_util_percent": 38.665306122448975, "ram_util_percent": 59.40408163265307}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.27}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.14, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.59, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.02, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.72, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.02, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.02, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0, 582.0, 636.0, 582.0, 579.0, 630.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 624.0, 633.0, 630.0, 633.0, 636.0, 639.0, 582.0, 630.0, 627.0, 633.0, 630.0, 633.0, 630.0, 567.0, 630.0, 539.0, 630.0, 630.0, 633.0, 630.0, 633.0, 630.0, 639.0, 587.0, 527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 296.0, 286.0, 314.0, 322.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 318.0, 309.0, 294.0, 288.0, 316.0, 317.0, 319.0, 320.0, 305.0, 319.0, 319.0, 314.0, 307.0, 323.0, 310.0, 323.0, 311.0, 325.0, 316.0, 323.0, 285.0, 297.0, 323.0, 307.0, 316.0, 311.0, 319.0, 314.0, 318.0, 312.0, 314.0, 319.0, 322.0, 308.0, 283.0, 284.0, 312.0, 318.0, 277.0, 262.0, 319.0, 311.0, 319.0, 311.0, 319.0, 314.0, 318.0, 312.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8249056294785048, "mean_processing_ms": 0.2334031065407326, "mean_inference_ms": 1.4249795896358415}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9672000, "num_steps_sampled": 5158400, "sample_time_ms": 23134.408, "load_time_ms": 37.402, "grad_time_ms": 10502.988, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0036110735964030027, "policy_loss": -0.003584003308787942, "vf_loss": 77.51012420654297, "vf_explained_var": 0.770778238773346, "kl": 0.00190709566231817, "entropy": 1.1118710041046143, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5158400, "episodes_total": 12896, "training_iteration": 403, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-13-59", "timestamp": 1660259639, "time_this_iter_s": 35.02220106124878, "time_total_s": 18049.246735095978, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18049.246735095978, "timesteps_since_restore": 5158400, "iterations_since_restore": 403, "perf": {"cpu_util_percent": 34.604, "ram_util_percent": 59.326}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.125}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.25, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.61, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.75, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0, 633.0, 630.0, 639.0, 587.0, 527.0, 636.0, 633.0, 636.0, 633.0, 582.0, 587.0, 630.0, 630.0, 627.0, 582.0, 587.0, 627.0, 624.0, 636.0, 633.0, 587.0, 630.0, 639.0, 636.0, 578.0, 633.0, 639.0, 630.0, 639.0, 633.0, 636.0, 582.0, 636.0, 584.0, 630.0, 581.0, 527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 311.0, 322.0, 311.0, 319.0, 319.0, 320.0, 291.0, 296.0, 273.0, 254.0, 316.0, 320.0, 316.0, 317.0, 316.0, 320.0, 319.0, 314.0, 291.0, 291.0, 301.0, 286.0, 314.0, 316.0, 321.0, 309.0, 318.0, 309.0, 296.0, 286.0, 291.0, 296.0, 318.0, 309.0, 308.0, 316.0, 316.0, 320.0, 316.0, 317.0, 301.0, 286.0, 314.0, 316.0, 319.0, 320.0, 320.0, 316.0, 295.0, 283.0, 314.0, 319.0, 319.0, 320.0, 311.0, 319.0, 322.0, 317.0, 314.0, 319.0, 319.0, 317.0, 288.0, 294.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8240164523926264, "mean_processing_ms": 0.23323028854357722, "mean_inference_ms": 1.4244623501474682}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9696000, "num_steps_sampled": 5171200, "sample_time_ms": 23257.709, "load_time_ms": 37.644, "grad_time_ms": 10547.246, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014424080727621913, "policy_loss": -0.00573391281068325, "vf_loss": 77.33064270019531, "vf_explained_var": 0.7716807723045349, "kl": 0.0015154121210798621, "entropy": 1.1135029792785645, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5171200, "episodes_total": 12928, "training_iteration": 404, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-14-33", "timestamp": 1660259673, "time_this_iter_s": 33.92467999458313, "time_total_s": 18083.17141509056, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18083.17141509056, "timesteps_since_restore": 5171200, "iterations_since_restore": 404, "perf": {"cpu_util_percent": 34.637499999999996, "ram_util_percent": 58.86041666666667}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 612.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.235}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.66, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.56, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0, 636.0, 584.0, 630.0, 581.0, 527.0, 582.0, 630.0, 639.0, 630.0, 582.0, 522.0, 621.0, 582.0, 564.0, 636.0, 630.0, 630.0, 630.0, 579.0, 633.0, 627.0, 636.0, 630.0, 633.0, 567.0, 582.0, 630.0, 639.0, 636.0, 636.0, 633.0, 567.0, 582.0, 627.0, 630.0, 576.0, 636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 311.0, 325.0, 290.0, 294.0, 313.0, 317.0, 296.0, 285.0, 258.0, 269.0, 291.0, 291.0, 322.0, 308.0, 319.0, 320.0, 313.0, 317.0, 288.0, 294.0, 268.0, 254.0, 310.0, 311.0, 288.0, 294.0, 268.0, 296.0, 313.0, 323.0, 318.0, 312.0, 315.0, 315.0, 313.0, 317.0, 285.0, 294.0, 318.0, 315.0, 321.0, 306.0, 317.0, 319.0, 313.0, 317.0, 314.0, 319.0, 282.0, 285.0, 294.0, 288.0, 314.0, 316.0, 322.0, 317.0, 312.0, 324.0, 319.0, 317.0, 313.0, 320.0, 290.0, 277.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8231192050910455, "mean_processing_ms": 0.23305365514326026, "mean_inference_ms": 1.4238788752206395}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9720000, "num_steps_sampled": 5184000, "sample_time_ms": 23218.954, "load_time_ms": 37.026, "grad_time_ms": 10357.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004148914944380522, "policy_loss": -0.003635302884504199, "vf_loss": 83.42072296142578, "vf_explained_var": 0.7650599479675293, "kl": 0.001778147299773991, "entropy": 1.115702509880066, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5184000, "episodes_total": 12960, "training_iteration": 405, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-15-06", "timestamp": 1660259706, "time_this_iter_s": 32.90920972824097, "time_total_s": 18116.080624818802, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18116.080624818802, "timesteps_since_restore": 5184000, "iterations_since_restore": 405, "perf": {"cpu_util_percent": 30.800000000000004, "ram_util_percent": 58.806521739130446}} -{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 615.11, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 265.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 307.555}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.11, "shaped_reward_min": 172, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.7, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.66, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0, 582.0, 627.0, 630.0, 576.0, 636.0, 630.0, 587.0, 636.0, 633.0, 582.0, 578.0, 612.0, 630.0, 633.0, 633.0, 582.0, 633.0, 630.0, 636.0, 630.0, 630.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 536.0, 582.0, 636.0, 630.0, 639.0, 630.0, 639.0, 582.0, 579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 299.0, 283.0, 308.0, 319.0, 319.0, 311.0, 291.0, 285.0, 319.0, 317.0, 310.0, 320.0, 293.0, 294.0, 324.0, 312.0, 319.0, 314.0, 288.0, 294.0, 301.0, 277.0, 307.0, 305.0, 321.0, 309.0, 317.0, 316.0, 319.0, 314.0, 283.0, 299.0, 314.0, 319.0, 313.0, 317.0, 319.0, 317.0, 314.0, 316.0, 314.0, 316.0, 283.0, 299.0, 319.0, 317.0, 305.0, 322.0, 317.0, 319.0, 319.0, 311.0, 323.0, 310.0, 324.0, 312.0, 268.0, 268.0, 287.0, 295.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8222123786920608, "mean_processing_ms": 0.23287368102080933, "mean_inference_ms": 1.4230607054783406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9744000, "num_steps_sampled": 5196800, "sample_time_ms": 23260.688, "load_time_ms": 37.047, "grad_time_ms": 10547.131, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006928029470145702, "policy_loss": -0.006872573401778936, "vf_loss": 81.25198364257812, "vf_explained_var": 0.7684532999992371, "kl": 0.0019740292336791754, "entropy": 1.1196430921554565, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5196800, "episodes_total": 12992, "training_iteration": 406, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-15-37", "timestamp": 1660259737, "time_this_iter_s": 30.75086998939514, "time_total_s": 18146.831494808197, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18146.831494808197, "timesteps_since_restore": 5196800, "iterations_since_restore": 406, "perf": {"cpu_util_percent": 34.53863636363637, "ram_util_percent": 58.81590909090908}} -{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 605.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 302.91}, "custom_metrics": {"sparse_reward_mean": 209.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 187.02, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.56, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.95, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.8, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.95, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.95, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0, 639.0, 630.0, 639.0, 582.0, 579.0, 633.0, 587.0, 582.0, 627.0, 539.0, 636.0, 633.0, 639.0, 630.0, 582.0, 579.0, 639.0, 636.0, 590.0, 627.0, 633.0, 639.0, 579.0, 587.0, 579.0, 636.0, 636.0, 630.0, 636.0, 633.0, 636.0, 584.0, 584.0, 633.0, 590.0, 630.0, 627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 316.0, 314.0, 317.0, 322.0, 288.0, 294.0, 291.0, 288.0, 319.0, 314.0, 302.0, 285.0, 290.0, 292.0, 313.0, 314.0, 265.0, 274.0, 322.0, 314.0, 316.0, 317.0, 319.0, 320.0, 316.0, 314.0, 288.0, 294.0, 292.0, 287.0, 322.0, 317.0, 324.0, 312.0, 299.0, 291.0, 308.0, 319.0, 315.0, 318.0, 319.0, 320.0, 285.0, 294.0, 293.0, 294.0, 293.0, 286.0, 313.0, 323.0, 316.0, 320.0, 316.0, 314.0, 316.0, 320.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8213047734882832, "mean_processing_ms": 0.2326926130887054, "mean_inference_ms": 1.4220964273978254}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9768000, "num_steps_sampled": 5209600, "sample_time_ms": 23078.335, "load_time_ms": 37.045, "grad_time_ms": 10614.832, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037456925492733717, "policy_loss": -0.004354165401309729, "vf_loss": 86.5316162109375, "vf_explained_var": 0.7801554799079895, "kl": 0.0024353403132408857, "entropy": 1.1066083908081055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5209600, "episodes_total": 13024, "training_iteration": 407, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-16-07", "timestamp": 1660259767, "time_this_iter_s": 29.56272530555725, "time_total_s": 18176.394220113754, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18176.394220113754, "timesteps_since_restore": 5209600, "iterations_since_restore": 407, "perf": {"cpu_util_percent": 33.98809523809524, "ram_util_percent": 58.745238095238086}} -{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 607.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.94}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 187.08, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0, 584.0, 633.0, 590.0, 630.0, 627.0, 633.0, 636.0, 579.0, 630.0, 630.0, 630.0, 630.0, 582.0, 630.0, 576.0, 636.0, 624.0, 630.0, 639.0, 624.0, 636.0, 576.0, 587.0, 639.0, 630.0, 633.0, 630.0, 593.0, 633.0, 579.0, 630.0, 639.0, 587.0, 582.0, 636.0, 582.0, 633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 299.0, 285.0, 319.0, 314.0, 296.0, 294.0, 319.0, 311.0, 319.0, 308.0, 319.0, 314.0, 314.0, 322.0, 285.0, 294.0, 316.0, 314.0, 313.0, 317.0, 316.0, 314.0, 313.0, 317.0, 298.0, 284.0, 314.0, 316.0, 290.0, 286.0, 316.0, 320.0, 314.0, 310.0, 316.0, 314.0, 322.0, 317.0, 310.0, 314.0, 319.0, 317.0, 291.0, 285.0, 293.0, 294.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 316.0, 314.0, 302.0, 291.0, 316.0, 317.0, 299.0, 280.0, 316.0, 314.0, 319.0, 320.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8203957464749945, "mean_processing_ms": 0.2325105755388932, "mean_inference_ms": 1.4210101321453532}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9792000, "num_steps_sampled": 5222400, "sample_time_ms": 22766.104, "load_time_ms": 36.963, "grad_time_ms": 10377.417, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032091455068439245, "policy_loss": -0.004078669007867575, "vf_loss": 78.43866729736328, "vf_explained_var": 0.7684862613677979, "kl": 0.00216904329136014, "entropy": 1.1121129989624023, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5222400, "episodes_total": 13056, "training_iteration": 408, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-16-35", "timestamp": 1660259795, "time_this_iter_s": 28.361918210983276, "time_total_s": 18204.756138324738, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18204.756138324738, "timesteps_since_restore": 5222400, "iterations_since_restore": 408, "perf": {"cpu_util_percent": 30.642500000000002, "ram_util_percent": 58.74749999999999}} -{"episode_reward_max": 639.0, "episode_reward_min": 3.0, "episode_reward_mean": 606.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 328.0}, "policy_reward_mean": {"ppo": 303.12}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 186.64, "shaped_reward_min": 3, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.16, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.69, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.15, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.15, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.15, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0, 587.0, 582.0, 636.0, 582.0, 633.0, 633.0, 633.0, 587.0, 544.0, 630.0, 587.0, 587.0, 582.0, 633.0, 3.0, 579.0, 582.0, 587.0, 579.0, 627.0, 633.0, 633.0, 582.0, 582.0, 636.0, 567.0, 587.0, 636.0, 630.0, 636.0, 576.0, 636.0, 582.0, 587.0, 636.0, 636.0, 636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 296.0, 291.0, 291.0, 291.0, 308.0, 328.0, 293.0, 289.0, 309.0, 324.0, 316.0, 317.0, 319.0, 314.0, 296.0, 291.0, 270.0, 274.0, 306.0, 324.0, 299.0, 288.0, 296.0, 291.0, 298.0, 284.0, 316.0, 317.0, 0.0, 3.0, 287.0, 292.0, 292.0, 290.0, 293.0, 294.0, 299.0, 280.0, 316.0, 311.0, 319.0, 314.0, 322.0, 311.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 281.0, 286.0, 289.0, 298.0, 316.0, 320.0, 316.0, 314.0, 324.0, 312.0, 280.0, 296.0, 316.0, 320.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8194928769561236, "mean_processing_ms": 0.23232969366347853, "mean_inference_ms": 1.4199784153489992}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9816000, "num_steps_sampled": 5235200, "sample_time_ms": 22579.389, "load_time_ms": 36.726, "grad_time_ms": 10063.447, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015366753796115518, "policy_loss": -0.006077593192458153, "vf_loss": 81.69181060791016, "vf_explained_var": 0.7707114219665527, "kl": 0.001978269312530756, "entropy": 1.1098326444625854, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5235200, "episodes_total": 13088, "training_iteration": 409, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-17-04", "timestamp": 1660259824, "time_this_iter_s": 29.396647930145264, "time_total_s": 18234.152786254883, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18234.152786254883, "timesteps_since_restore": 5235200, "iterations_since_restore": 409, "perf": {"cpu_util_percent": 30.553658536585367, "ram_util_percent": 58.824390243902435}} -{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 616.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.15}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.79, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.64, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.48, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0, 582.0, 587.0, 636.0, 636.0, 636.0, 636.0, 636.0, 636.0, 633.0, 618.0, 582.0, 630.0, 636.0, 636.0, 633.0, 636.0, 639.0, 630.0, 576.0, 582.0, 633.0, 639.0, 579.0, 636.0, 636.0, 639.0, 579.0, 636.0, 630.0, 633.0, 636.0, 579.0, 630.0, 633.0, 516.0, 633.0, 633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 301.0, 281.0, 290.0, 297.0, 319.0, 317.0, 319.0, 317.0, 322.0, 314.0, 315.0, 321.0, 314.0, 322.0, 319.0, 317.0, 311.0, 322.0, 312.0, 306.0, 294.0, 288.0, 310.0, 320.0, 314.0, 322.0, 314.0, 322.0, 316.0, 317.0, 316.0, 320.0, 317.0, 322.0, 319.0, 311.0, 281.0, 295.0, 293.0, 289.0, 316.0, 317.0, 322.0, 317.0, 293.0, 286.0, 316.0, 320.0, 325.0, 311.0, 319.0, 320.0, 293.0, 286.0, 319.0, 317.0, 319.0, 311.0, 317.0, 316.0, 314.0, 322.0, 291.0, 288.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8185939281976542, "mean_processing_ms": 0.23214957034967199, "mean_inference_ms": 1.4189514044158715}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9840000, "num_steps_sampled": 5248000, "sample_time_ms": 22039.582, "load_time_ms": 37.045, "grad_time_ms": 9893.172, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014178849523887038, "policy_loss": -0.008465434424579144, "vf_loss": 76.02017974853516, "vf_explained_var": 0.7725793719291687, "kl": 0.0019942354410886765, "entropy": 1.1089389324188232, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5248000, "episodes_total": 13120, "training_iteration": 410, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-17-33", "timestamp": 1660259853, "time_this_iter_s": 28.213119983673096, "time_total_s": 18262.365906238556, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18262.365906238556, "timesteps_since_restore": 5248000, "iterations_since_restore": 410, "perf": {"cpu_util_percent": 35.04, "ram_util_percent": 58.745000000000005}} -{"episode_reward_max": 639.0, "episode_reward_min": 465.0, "episode_reward_mean": 615.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 307.625}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.45, "shaped_reward_min": 145, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.79, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.29, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 18.07, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.11, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.55, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.84, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.77, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.11, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.11, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0, 630.0, 633.0, 516.0, 633.0, 633.0, 630.0, 570.0, 633.0, 633.0, 584.0, 621.0, 636.0, 584.0, 630.0, 576.0, 587.0, 630.0, 630.0, 630.0, 636.0, 587.0, 582.0, 630.0, 639.0, 633.0, 633.0, 465.0, 636.0, 587.0, 630.0, 630.0, 587.0, 636.0, 630.0, 630.0, 636.0, 587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 313.0, 320.0, 265.0, 251.0, 321.0, 312.0, 311.0, 322.0, 305.0, 325.0, 279.0, 291.0, 319.0, 314.0, 316.0, 317.0, 287.0, 297.0, 311.0, 310.0, 321.0, 315.0, 299.0, 285.0, 314.0, 316.0, 282.0, 294.0, 296.0, 291.0, 318.0, 312.0, 316.0, 314.0, 314.0, 316.0, 316.0, 320.0, 299.0, 288.0, 294.0, 288.0, 313.0, 317.0, 319.0, 320.0, 314.0, 319.0, 317.0, 316.0, 239.0, 226.0, 319.0, 317.0, 301.0, 286.0, 321.0, 309.0, 316.0, 314.0, 293.0, 294.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8176992911646718, "mean_processing_ms": 0.23196962818643072, "mean_inference_ms": 1.417972483148229}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9864000, "num_steps_sampled": 5260800, "sample_time_ms": 21770.07, "load_time_ms": 36.824, "grad_time_ms": 9519.356, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001577894203364849, "policy_loss": -0.005276820156723261, "vf_loss": 74.05913543701172, "vf_explained_var": 0.7708218693733215, "kl": 0.002156370086595416, "entropy": 1.1023942232131958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5260800, "episodes_total": 13152, "training_iteration": 411, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-18-03", "timestamp": 1660259883, "time_this_iter_s": 30.32603693008423, "time_total_s": 18292.69194316864, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18292.69194316864, "timesteps_since_restore": 5260800, "iterations_since_restore": 411, "perf": {"cpu_util_percent": 34.49999999999999, "ram_util_percent": 58.81162790697674}} -{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.465}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.93, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0, 636.0, 630.0, 630.0, 636.0, 587.0, 624.0, 636.0, 630.0, 627.0, 633.0, 587.0, 633.0, 639.0, 581.0, 630.0, 570.0, 636.0, 636.0, 587.0, 633.0, 627.0, 587.0, 633.0, 627.0, 582.0, 636.0, 633.0, 627.0, 587.0, 572.0, 582.0, 630.0, 630.0, 639.0, 636.0, 636.0, 582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 317.0, 319.0, 313.0, 317.0, 311.0, 319.0, 321.0, 315.0, 293.0, 294.0, 310.0, 314.0, 321.0, 315.0, 316.0, 314.0, 305.0, 322.0, 322.0, 311.0, 298.0, 289.0, 316.0, 317.0, 312.0, 327.0, 287.0, 294.0, 316.0, 314.0, 299.0, 271.0, 312.0, 324.0, 314.0, 322.0, 301.0, 286.0, 321.0, 312.0, 316.0, 311.0, 293.0, 294.0, 313.0, 320.0, 315.0, 312.0, 286.0, 296.0, 317.0, 319.0, 319.0, 314.0, 310.0, 317.0, 293.0, 294.0, 284.0, 288.0, 287.0, 295.0, 316.0, 314.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8168045018063097, "mean_processing_ms": 0.23178953609537822, "mean_inference_ms": 1.4169011715931346}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9888000, "num_steps_sampled": 5273600, "sample_time_ms": 21329.9, "load_time_ms": 36.743, "grad_time_ms": 9279.775, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034279574174433947, "policy_loss": -0.004527573008090258, "vf_loss": 85.10655975341797, "vf_explained_var": 0.7758853435516357, "kl": 0.0018181651830673218, "entropy": 1.1102546453475952, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5273600, "episodes_total": 13184, "training_iteration": 412, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-18-31", "timestamp": 1660259911, "time_this_iter_s": 28.26536202430725, "time_total_s": 18320.957305192947, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18320.957305192947, "timesteps_since_restore": 5273600, "iterations_since_restore": 412, "perf": {"cpu_util_percent": 35.269999999999996, "ram_util_percent": 59.315}} -{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 304.08}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.96, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.28, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0, 630.0, 639.0, 636.0, 636.0, 582.0, 633.0, 576.0, 630.0, 587.0, 630.0, 639.0, 627.0, 636.0, 627.0, 630.0, 630.0, 521.0, 636.0, 576.0, 633.0, 633.0, 636.0, 584.0, 624.0, 633.0, 630.0, 633.0, 584.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 630.0, 579.0, 630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 319.0, 311.0, 314.0, 325.0, 319.0, 317.0, 317.0, 319.0, 294.0, 288.0, 314.0, 319.0, 286.0, 290.0, 319.0, 311.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 310.0, 317.0, 316.0, 320.0, 313.0, 314.0, 324.0, 306.0, 320.0, 310.0, 254.0, 267.0, 319.0, 317.0, 295.0, 281.0, 319.0, 314.0, 316.0, 317.0, 319.0, 317.0, 291.0, 293.0, 316.0, 308.0, 316.0, 317.0, 311.0, 319.0, 313.0, 320.0, 293.0, 291.0, 317.0, 316.0, 314.0, 322.0, 314.0, 319.0, 326.0, 307.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8159183949251133, "mean_processing_ms": 0.23161113375748543, "mean_inference_ms": 1.4159039621746354}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9912000, "num_steps_sampled": 5286400, "sample_time_ms": 21002.373, "load_time_ms": 36.818, "grad_time_ms": 9265.226, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002250772900879383, "policy_loss": -0.005318752024322748, "vf_loss": 81.240478515625, "vf_explained_var": 0.7617523074150085, "kl": 0.0018393909558653831, "entropy": 1.1090354919433594, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5286400, "episodes_total": 13216, "training_iteration": 413, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-19-03", "timestamp": 1660259943, "time_this_iter_s": 31.600411891937256, "time_total_s": 18352.557717084885, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18352.557717084885, "timesteps_since_restore": 5286400, "iterations_since_restore": 413, "perf": {"cpu_util_percent": 31.806666666666665, "ram_util_percent": 58.973333333333315}} -{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 608.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.405}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 187.21, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.7, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.56, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.15, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.7, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.7, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 579.0, 630.0, 630.0, 582.0, 633.0, 633.0, 639.0, 584.0, 639.0, 197.0, 579.0, 465.0, 579.0, 636.0, 630.0, 639.0, 630.0, 630.0, 413.0, 582.0, 630.0, 576.0, 633.0, 639.0, 582.0, 633.0, 630.0, 630.0, 584.0, 587.0, 522.0, 636.0, 630.0, 627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 321.0, 315.0, 319.0, 317.0, 319.0, 311.0, 290.0, 289.0, 314.0, 316.0, 311.0, 319.0, 291.0, 291.0, 314.0, 319.0, 311.0, 322.0, 317.0, 322.0, 293.0, 291.0, 316.0, 323.0, 100.0, 97.0, 294.0, 285.0, 229.0, 236.0, 288.0, 291.0, 321.0, 315.0, 311.0, 319.0, 317.0, 322.0, 319.0, 311.0, 311.0, 319.0, 207.0, 206.0, 293.0, 289.0, 311.0, 319.0, 285.0, 291.0, 311.0, 322.0, 319.0, 320.0, 296.0, 286.0, 318.0, 315.0, 321.0, 309.0, 316.0, 314.0, 287.0, 297.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8150462375742643, "mean_processing_ms": 0.23143722383890097, "mean_inference_ms": 1.4151601741062898}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9936000, "num_steps_sampled": 5299200, "sample_time_ms": 21315.468, "load_time_ms": 36.748, "grad_time_ms": 9192.863, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032192638609558344, "policy_loss": -0.004382268991321325, "vf_loss": 81.57144927978516, "vf_explained_var": 0.7626829147338867, "kl": 0.001976991770789027, "entropy": 1.1112231016159058, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5299200, "episodes_total": 13248, "training_iteration": 414, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-19-39", "timestamp": 1660259979, "time_this_iter_s": 36.33256697654724, "time_total_s": 18388.890284061432, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18388.890284061432, "timesteps_since_restore": 5299200, "iterations_since_restore": 414, "perf": {"cpu_util_percent": 31.756862745098033, "ram_util_percent": 59.57450980392157}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.4}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.4, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.55, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.79, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.69, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0, 587.0, 522.0, 636.0, 630.0, 627.0, 636.0, 587.0, 621.0, 630.0, 582.0, 636.0, 587.0, 633.0, 630.0, 633.0, 627.0, 587.0, 633.0, 627.0, 630.0, 633.0, 587.0, 587.0, 633.0, 587.0, 587.0, 587.0, 636.0, 579.0, 627.0, 633.0, 582.0, 630.0, 630.0, 590.0, 633.0, 627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 296.0, 291.0, 262.0, 260.0, 319.0, 317.0, 313.0, 317.0, 313.0, 314.0, 319.0, 317.0, 298.0, 289.0, 317.0, 304.0, 317.0, 313.0, 286.0, 296.0, 319.0, 317.0, 288.0, 299.0, 320.0, 313.0, 316.0, 314.0, 316.0, 317.0, 322.0, 305.0, 285.0, 302.0, 316.0, 317.0, 314.0, 313.0, 314.0, 316.0, 317.0, 316.0, 290.0, 297.0, 299.0, 288.0, 316.0, 317.0, 288.0, 299.0, 288.0, 299.0, 288.0, 299.0, 322.0, 314.0, 284.0, 295.0, 316.0, 311.0, 317.0, 316.0, 285.0, 297.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8141797334624801, "mean_processing_ms": 0.23126474228719665, "mean_inference_ms": 1.4144802295158576}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9960000, "num_steps_sampled": 5312000, "sample_time_ms": 21138.109, "load_time_ms": 36.708, "grad_time_ms": 9062.52, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00037816105759702623, "policy_loss": -0.006607938092201948, "vf_loss": 75.41075897216797, "vf_explained_var": 0.7763264775276184, "kl": 0.0018363663693889976, "entropy": 1.1099668741226196, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5312000, "episodes_total": 13280, "training_iteration": 415, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-20-09", "timestamp": 1660260009, "time_this_iter_s": 29.82709288597107, "time_total_s": 18418.717376947403, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18418.717376947403, "timesteps_since_restore": 5312000, "iterations_since_restore": 415, "perf": {"cpu_util_percent": 32.99761904761905, "ram_util_percent": 59.095238095238095}} -{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 615.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.985}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.37, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.66, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.96, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.99, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.99, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.99, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0, 630.0, 630.0, 590.0, 633.0, 627.0, 633.0, 639.0, 530.0, 630.0, 582.0, 582.0, 624.0, 582.0, 630.0, 630.0, 630.0, 630.0, 587.0, 636.0, 639.0, 636.0, 627.0, 633.0, 627.0, 633.0, 636.0, 630.0, 630.0, 630.0, 630.0, 636.0, 639.0, 582.0, 636.0, 639.0, 636.0, 630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 313.0, 317.0, 311.0, 319.0, 299.0, 291.0, 318.0, 315.0, 311.0, 316.0, 314.0, 319.0, 324.0, 315.0, 260.0, 270.0, 313.0, 317.0, 290.0, 292.0, 286.0, 296.0, 313.0, 311.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 314.0, 316.0, 310.0, 320.0, 288.0, 299.0, 319.0, 317.0, 319.0, 320.0, 314.0, 322.0, 307.0, 320.0, 316.0, 317.0, 311.0, 316.0, 316.0, 317.0, 314.0, 322.0, 311.0, 319.0, 313.0, 317.0, 316.0, 314.0, 316.0, 314.0, 314.0, 322.0, 319.0, 320.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8133221505395263, "mean_processing_ms": 0.23109594102314795, "mean_inference_ms": 1.413916507245515}, "off_policy_estimator": {}, "info": {"num_steps_trained": 9984000, "num_steps_sampled": 5324800, "sample_time_ms": 21409.91, "load_time_ms": 36.506, "grad_time_ms": 8913.404, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003510029288008809, "policy_loss": -0.0037950894329696894, "vf_loss": 78.6290054321289, "vf_explained_var": 0.7686605453491211, "kl": 0.0018828777829185128, "entropy": 1.1155847311019897, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5324800, "episodes_total": 13312, "training_iteration": 416, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-20-41", "timestamp": 1660260041, "time_this_iter_s": 31.975250005722046, "time_total_s": 18450.692626953125, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18450.692626953125, "timesteps_since_restore": 5324800, "iterations_since_restore": 416, "perf": {"cpu_util_percent": 31.96888888888889, "ram_util_percent": 59.13111111111111}} -{"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 613.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.745}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.49, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.4, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.76, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.33, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.57, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.33, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.33, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0, 582.0, 636.0, 639.0, 636.0, 630.0, 633.0, 570.0, 633.0, 582.0, 636.0, 630.0, 639.0, 639.0, 584.0, 570.0, 636.0, 627.0, 627.0, 630.0, 630.0, 573.0, 630.0, 582.0, 587.0, 633.0, 582.0, 633.0, 633.0, 630.0, 525.0, 582.0, 633.0, 627.0, 633.0, 627.0, 561.0, 636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 290.0, 292.0, 319.0, 317.0, 322.0, 317.0, 321.0, 315.0, 316.0, 314.0, 317.0, 316.0, 284.0, 286.0, 316.0, 317.0, 291.0, 291.0, 314.0, 322.0, 316.0, 314.0, 324.0, 315.0, 319.0, 320.0, 287.0, 297.0, 287.0, 283.0, 321.0, 315.0, 313.0, 314.0, 316.0, 311.0, 319.0, 311.0, 311.0, 319.0, 285.0, 288.0, 318.0, 312.0, 286.0, 296.0, 291.0, 296.0, 314.0, 319.0, 280.0, 302.0, 314.0, 319.0, 316.0, 317.0, 319.0, 311.0, 265.0, 260.0, 293.0, 289.0, 314.0, 319.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.812458332690278, "mean_processing_ms": 0.23092571272245643, "mean_inference_ms": 1.413108766021267}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10008000, "num_steps_sampled": 5337600, "sample_time_ms": 21466.731, "load_time_ms": 36.538, "grad_time_ms": 8937.935, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011007506400346756, "policy_loss": -0.005807527806609869, "vf_loss": 74.63658905029297, "vf_explained_var": 0.7761281132698059, "kl": 0.0020840545184910297, "entropy": 1.110751986503601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5337600, "episodes_total": 13344, "training_iteration": 417, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-21-11", "timestamp": 1660260071, "time_this_iter_s": 30.376654863357544, "time_total_s": 18481.069281816483, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18481.069281816483, "timesteps_since_restore": 5337600, "iterations_since_restore": 417, "perf": {"cpu_util_percent": 32.944186046511625, "ram_util_percent": 59.151162790697676}} -{"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 613.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.775}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.55, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.31, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.55, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0, 627.0, 633.0, 627.0, 561.0, 636.0, 552.0, 630.0, 633.0, 579.0, 636.0, 587.0, 579.0, 630.0, 630.0, 633.0, 579.0, 630.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 636.0, 627.0, 582.0, 633.0, 633.0, 587.0, 636.0, 584.0, 579.0, 630.0, 582.0, 627.0, 633.0, 567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 310.0, 317.0, 314.0, 319.0, 318.0, 309.0, 285.0, 276.0, 322.0, 314.0, 281.0, 271.0, 311.0, 319.0, 322.0, 311.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 279.0, 300.0, 313.0, 317.0, 316.0, 314.0, 319.0, 314.0, 282.0, 297.0, 311.0, 319.0, 319.0, 317.0, 319.0, 317.0, 313.0, 320.0, 296.0, 286.0, 319.0, 311.0, 314.0, 322.0, 314.0, 322.0, 306.0, 321.0, 289.0, 293.0, 311.0, 322.0, 319.0, 314.0, 285.0, 302.0, 319.0, 317.0, 290.0, 294.0, 288.0, 291.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8116024056297, "mean_processing_ms": 0.23075773519114987, "mean_inference_ms": 1.4123586541833584}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10032000, "num_steps_sampled": 5350400, "sample_time_ms": 21668.296, "load_time_ms": 36.478, "grad_time_ms": 9333.309, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028228743467479944, "policy_loss": -0.004008984658867121, "vf_loss": 73.87229919433594, "vf_explained_var": 0.7751579284667969, "kl": 0.0019005200592800975, "entropy": 1.110758900642395, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5350400, "episodes_total": 13376, "training_iteration": 418, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-21-46", "timestamp": 1660260106, "time_this_iter_s": 34.32990908622742, "time_total_s": 18515.39919090271, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18515.39919090271, "timesteps_since_restore": 5350400, "iterations_since_restore": 418, "perf": {"cpu_util_percent": 29.667346938775513, "ram_util_percent": 59.18979591836735}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 609.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.74}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.72, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0, 630.0, 582.0, 627.0, 633.0, 567.0, 636.0, 636.0, 636.0, 630.0, 633.0, 630.0, 630.0, 627.0, 584.0, 633.0, 630.0, 573.0, 636.0, 582.0, 579.0, 633.0, 630.0, 636.0, 633.0, 573.0, 587.0, 579.0, 627.0, 630.0, 564.0, 636.0, 582.0, 636.0, 633.0, 579.0, 633.0, 587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 311.0, 319.0, 295.0, 287.0, 308.0, 319.0, 316.0, 317.0, 278.0, 289.0, 319.0, 317.0, 319.0, 317.0, 314.0, 322.0, 313.0, 317.0, 311.0, 322.0, 314.0, 316.0, 319.0, 311.0, 313.0, 314.0, 290.0, 294.0, 316.0, 317.0, 314.0, 316.0, 283.0, 290.0, 322.0, 314.0, 297.0, 285.0, 291.0, 288.0, 318.0, 315.0, 314.0, 316.0, 322.0, 314.0, 314.0, 319.0, 296.0, 277.0, 293.0, 294.0, 291.0, 288.0, 305.0, 322.0, 314.0, 316.0, 279.0, 285.0, 314.0, 322.0, 288.0, 294.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8107503957777891, "mean_processing_ms": 0.2305898995792267, "mean_inference_ms": 1.4116530949413433}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10056000, "num_steps_sampled": 5363200, "sample_time_ms": 21868.435, "load_time_ms": 36.628, "grad_time_ms": 9569.244, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002508052857592702, "policy_loss": -0.004472339991480112, "vf_loss": 75.3826904296875, "vf_explained_var": 0.7911410927772522, "kl": 0.0020311845000833273, "entropy": 1.1157482862472534, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5363200, "episodes_total": 13408, "training_iteration": 419, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-22-20", "timestamp": 1660260140, "time_this_iter_s": 33.75737500190735, "time_total_s": 18549.156565904617, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18549.156565904617, "timesteps_since_restore": 5363200, "iterations_since_restore": 419, "perf": {"cpu_util_percent": 32.68936170212766, "ram_util_percent": 59.19574468085105}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 609.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.805}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.81, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.29, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0, 636.0, 633.0, 579.0, 633.0, 587.0, 630.0, 639.0, 576.0, 630.0, 633.0, 582.0, 587.0, 618.0, 633.0, 582.0, 621.0, 636.0, 587.0, 636.0, 582.0, 636.0, 630.0, 582.0, 633.0, 581.0, 630.0, 636.0, 630.0, 627.0, 633.0, 636.0, 582.0, 627.0, 536.0, 633.0, 627.0, 579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 319.0, 317.0, 321.0, 312.0, 291.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 314.0, 319.0, 320.0, 288.0, 288.0, 316.0, 314.0, 316.0, 317.0, 291.0, 291.0, 293.0, 294.0, 308.0, 310.0, 314.0, 319.0, 289.0, 293.0, 318.0, 303.0, 314.0, 322.0, 298.0, 289.0, 324.0, 312.0, 290.0, 292.0, 314.0, 322.0, 313.0, 317.0, 288.0, 294.0, 322.0, 311.0, 289.0, 292.0, 314.0, 316.0, 314.0, 322.0, 314.0, 316.0, 312.0, 315.0, 311.0, 322.0, 314.0, 322.0, 294.0, 288.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8099141458256162, "mean_processing_ms": 0.23042638477560978, "mean_inference_ms": 1.411223574078033}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10080000, "num_steps_sampled": 5376000, "sample_time_ms": 22436.969, "load_time_ms": 36.407, "grad_time_ms": 9832.43, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009076216374523938, "policy_loss": -0.0059606158174574375, "vf_loss": 74.23489379882812, "vf_explained_var": 0.7686769366264343, "kl": 0.00233254861086607, "entropy": 1.1105002164840698, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5376000, "episodes_total": 13440, "training_iteration": 420, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-22-56", "timestamp": 1660260176, "time_this_iter_s": 36.52545118331909, "time_total_s": 18585.682017087936, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18585.682017087936, "timesteps_since_restore": 5376000, "iterations_since_restore": 420, "perf": {"cpu_util_percent": 30.815384615384616, "ram_util_percent": 59.175}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 604.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.165}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 185.13, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.58, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.15, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0, 627.0, 536.0, 633.0, 627.0, 579.0, 630.0, 639.0, 630.0, 579.0, 582.0, 636.0, 180.0, 582.0, 579.0, 636.0, 582.0, 573.0, 633.0, 639.0, 633.0, 630.0, 630.0, 633.0, 630.0, 630.0, 630.0, 579.0, 636.0, 630.0, 582.0, 633.0, 587.0, 587.0, 630.0, 627.0, 639.0, 576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 316.0, 311.0, 275.0, 261.0, 319.0, 314.0, 314.0, 313.0, 291.0, 288.0, 316.0, 314.0, 324.0, 315.0, 311.0, 319.0, 288.0, 291.0, 294.0, 288.0, 319.0, 317.0, 92.0, 88.0, 293.0, 289.0, 287.0, 292.0, 319.0, 317.0, 289.0, 293.0, 284.0, 289.0, 316.0, 317.0, 322.0, 317.0, 311.0, 322.0, 316.0, 314.0, 316.0, 314.0, 322.0, 311.0, 319.0, 311.0, 311.0, 319.0, 317.0, 313.0, 288.0, 291.0, 319.0, 317.0, 311.0, 319.0, 289.0, 293.0, 319.0, 314.0, 296.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8090875663620912, "mean_processing_ms": 0.23026522516611045, "mean_inference_ms": 1.4109073671228203}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10104000, "num_steps_sampled": 5388800, "sample_time_ms": 22748.439, "load_time_ms": 36.384, "grad_time_ms": 10256.981, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004364584165159613, "policy_loss": -0.007198403123766184, "vf_loss": 81.89620208740234, "vf_explained_var": 0.7658551335334778, "kl": 0.0018362547270953655, "entropy": 1.1095339059829712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5388800, "episodes_total": 13472, "training_iteration": 421, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-23-34", "timestamp": 1660260214, "time_this_iter_s": 37.69177174568176, "time_total_s": 18623.373788833618, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18623.373788833618, "timesteps_since_restore": 5388800, "iterations_since_restore": 421, "perf": {"cpu_util_percent": 34.76037735849056, "ram_util_percent": 59.533962264150944}} -{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 605.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 302.835}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.67, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.3, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0, 587.0, 630.0, 627.0, 639.0, 576.0, 630.0, 576.0, 630.0, 516.0, 630.0, 636.0, 639.0, 630.0, 582.0, 633.0, 576.0, 582.0, 633.0, 587.0, 627.0, 630.0, 633.0, 630.0, 582.0, 582.0, 633.0, 633.0, 633.0, 630.0, 627.0, 636.0, 579.0, 630.0, 636.0, 633.0, 627.0, 627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 291.0, 296.0, 313.0, 317.0, 311.0, 316.0, 314.0, 325.0, 276.0, 300.0, 317.0, 313.0, 285.0, 291.0, 314.0, 316.0, 254.0, 262.0, 319.0, 311.0, 317.0, 319.0, 319.0, 320.0, 316.0, 314.0, 290.0, 292.0, 314.0, 319.0, 285.0, 291.0, 290.0, 292.0, 316.0, 317.0, 291.0, 296.0, 310.0, 317.0, 316.0, 314.0, 316.0, 317.0, 314.0, 316.0, 288.0, 294.0, 291.0, 291.0, 311.0, 322.0, 313.0, 320.0, 316.0, 317.0, 311.0, 319.0, 315.0, 312.0, 316.0, 320.0, 291.0, 288.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8082635107475633, "mean_processing_ms": 0.23010486784203785, "mean_inference_ms": 1.4106251231358269}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10128000, "num_steps_sampled": 5401600, "sample_time_ms": 23169.092, "load_time_ms": 36.151, "grad_time_ms": 10580.278, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029069948941469193, "policy_loss": -0.004262510221451521, "vf_loss": 77.2344970703125, "vf_explained_var": 0.7713862061500549, "kl": 0.001992677804082632, "entropy": 1.1078964471817017, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5401600, "episodes_total": 13504, "training_iteration": 422, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-24-10", "timestamp": 1660260250, "time_this_iter_s": 35.70268106460571, "time_total_s": 18659.076469898224, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18659.076469898224, "timesteps_since_restore": 5401600, "iterations_since_restore": 422, "perf": {"cpu_util_percent": 33.46078431372549, "ram_util_percent": 59.009803921568626}} -{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 603.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.715}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.43, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.22, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.81, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0, 630.0, 636.0, 633.0, 627.0, 627.0, 630.0, 579.0, 576.0, 582.0, 639.0, 630.0, 518.0, 582.0, 582.0, 624.0, 624.0, 633.0, 630.0, 584.0, 633.0, 633.0, 627.0, 630.0, 633.0, 579.0, 633.0, 582.0, 582.0, 627.0, 633.0, 519.0, 582.0, 579.0, 470.0, 587.0, 579.0, 639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 311.0, 325.0, 314.0, 319.0, 313.0, 314.0, 313.0, 314.0, 316.0, 314.0, 288.0, 291.0, 283.0, 293.0, 289.0, 293.0, 317.0, 322.0, 316.0, 314.0, 279.0, 239.0, 293.0, 289.0, 293.0, 289.0, 310.0, 314.0, 304.0, 320.0, 311.0, 322.0, 313.0, 317.0, 295.0, 289.0, 314.0, 319.0, 317.0, 316.0, 313.0, 314.0, 316.0, 314.0, 321.0, 312.0, 281.0, 298.0, 316.0, 317.0, 293.0, 289.0, 287.0, 295.0, 316.0, 311.0, 316.0, 317.0, 261.0, 258.0, 285.0, 297.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8074351528225807, "mean_processing_ms": 0.2299430227686211, "mean_inference_ms": 1.4101585420796834}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10152000, "num_steps_sampled": 5414400, "sample_time_ms": 23238.003, "load_time_ms": 36.154, "grad_time_ms": 10699.531, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004093436989933252, "policy_loss": -0.003705031471326947, "vf_loss": 83.51854705810547, "vf_explained_var": 0.7628346085548401, "kl": 0.001839231583289802, "entropy": 1.1067644357681274, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5414400, "episodes_total": 13536, "training_iteration": 423, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-24-43", "timestamp": 1660260283, "time_this_iter_s": 33.482574224472046, "time_total_s": 18692.559044122696, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18692.559044122696, "timesteps_since_restore": 5414400, "iterations_since_restore": 423, "perf": {"cpu_util_percent": 33.295744680851065, "ram_util_percent": 58.97021276595746}} -{"episode_reward_max": 639.0, "episode_reward_min": 470.0, "episode_reward_mean": 602.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.37}, "custom_metrics": {"sparse_reward_mean": 208.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 185.94, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.71, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 17, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 11, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0, 579.0, 470.0, 587.0, 579.0, 639.0, 587.0, 630.0, 579.0, 633.0, 582.0, 579.0, 576.0, 630.0, 624.0, 630.0, 579.0, 636.0, 522.0, 636.0, 630.0, 587.0, 633.0, 582.0, 630.0, 630.0, 579.0, 573.0, 579.0, 582.0, 582.0, 636.0, 636.0, 633.0, 579.0, 587.0, 579.0, 582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 287.0, 292.0, 226.0, 244.0, 299.0, 288.0, 297.0, 282.0, 317.0, 322.0, 286.0, 301.0, 316.0, 314.0, 290.0, 289.0, 316.0, 317.0, 293.0, 289.0, 288.0, 291.0, 293.0, 283.0, 313.0, 317.0, 308.0, 316.0, 319.0, 311.0, 286.0, 293.0, 316.0, 320.0, 262.0, 260.0, 319.0, 317.0, 316.0, 314.0, 288.0, 299.0, 314.0, 319.0, 298.0, 284.0, 316.0, 314.0, 321.0, 309.0, 287.0, 292.0, 285.0, 288.0, 290.0, 289.0, 286.0, 296.0, 289.0, 293.0, 316.0, 320.0, 319.0, 317.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8066043021110226, "mean_processing_ms": 0.22977916427475648, "mean_inference_ms": 1.4095576278285673}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10176000, "num_steps_sampled": 5427200, "sample_time_ms": 22903.737, "load_time_ms": 36.042, "grad_time_ms": 10790.567, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005737189203500748, "policy_loss": -0.006916053593158722, "vf_loss": 80.42852783203125, "vf_explained_var": 0.7650584578514099, "kl": 0.0017220000736415386, "entropy": 1.1061476469039917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5427200, "episodes_total": 13568, "training_iteration": 424, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-25-17", "timestamp": 1660260317, "time_this_iter_s": 33.90529203414917, "time_total_s": 18726.464336156845, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18726.464336156845, "timesteps_since_restore": 5427200, "iterations_since_restore": 424, "perf": {"cpu_util_percent": 32.64166666666667, "ram_util_percent": 58.96875}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 606.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.09}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.58, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.13, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0, 633.0, 579.0, 587.0, 579.0, 582.0, 576.0, 636.0, 576.0, 639.0, 579.0, 636.0, 630.0, 579.0, 582.0, 582.0, 624.0, 581.0, 636.0, 630.0, 582.0, 582.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 627.0, 633.0, 633.0, 587.0, 582.0, 582.0, 530.0, 633.0, 630.0, 582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 322.0, 311.0, 291.0, 288.0, 291.0, 296.0, 288.0, 291.0, 296.0, 286.0, 282.0, 294.0, 309.0, 327.0, 288.0, 288.0, 322.0, 317.0, 282.0, 297.0, 319.0, 317.0, 316.0, 314.0, 288.0, 291.0, 291.0, 291.0, 288.0, 294.0, 313.0, 311.0, 292.0, 289.0, 319.0, 317.0, 316.0, 314.0, 288.0, 294.0, 292.0, 290.0, 314.0, 319.0, 302.0, 280.0, 316.0, 320.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 313.0, 314.0, 322.0, 311.0, 316.0, 317.0, 285.0, 302.0, 286.0, 296.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8057716704864768, "mean_processing_ms": 0.22961462225140888, "mean_inference_ms": 1.4087586857000423}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10200000, "num_steps_sampled": 5440000, "sample_time_ms": 22855.818, "load_time_ms": 36.111, "grad_time_ms": 10847.011, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001967804506421089, "policy_loss": -0.00539380731061101, "vf_loss": 79.134033203125, "vf_explained_var": 0.7726359963417053, "kl": 0.0021053599193692207, "entropy": 1.1035689115524292, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5440000, "episodes_total": 13600, "training_iteration": 425, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-25-47", "timestamp": 1660260347, "time_this_iter_s": 29.913795948028564, "time_total_s": 18756.378132104874, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18756.378132104874, "timesteps_since_restore": 5440000, "iterations_since_restore": 425, "perf": {"cpu_util_percent": 31.83333333333333, "ram_util_percent": 58.778571428571425}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 610.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 305.285}, "custom_metrics": {"sparse_reward_mean": 211.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.37, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.96, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.87, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.7, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.89, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.96, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.96, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0, 582.0, 530.0, 633.0, 630.0, 582.0, 627.0, 630.0, 582.0, 636.0, 582.0, 587.0, 636.0, 627.0, 636.0, 633.0, 633.0, 630.0, 579.0, 579.0, 587.0, 636.0, 582.0, 639.0, 630.0, 627.0, 582.0, 582.0, 584.0, 639.0, 582.0, 639.0, 630.0, 522.0, 587.0, 584.0, 579.0, 633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 297.0, 285.0, 270.0, 260.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 313.0, 314.0, 316.0, 314.0, 291.0, 291.0, 317.0, 319.0, 293.0, 289.0, 290.0, 297.0, 319.0, 317.0, 311.0, 316.0, 314.0, 322.0, 316.0, 317.0, 321.0, 312.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 304.0, 283.0, 319.0, 317.0, 293.0, 289.0, 319.0, 320.0, 314.0, 316.0, 304.0, 323.0, 291.0, 291.0, 291.0, 291.0, 293.0, 291.0, 319.0, 320.0, 291.0, 291.0, 324.0, 315.0, 319.0, 311.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8049370656544826, "mean_processing_ms": 0.22944824169786282, "mean_inference_ms": 1.4078260577315087}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10224000, "num_steps_sampled": 5452800, "sample_time_ms": 22520.554, "load_time_ms": 36.416, "grad_time_ms": 10937.296, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 9.403874173585791e-06, "policy_loss": -0.006973860785365105, "vf_loss": 75.33930969238281, "vf_explained_var": 0.7694594264030457, "kl": 0.00176583684515208, "entropy": 1.1013368368148804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5452800, "episodes_total": 13632, "training_iteration": 426, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-26-16", "timestamp": 1660260376, "time_this_iter_s": 29.526015043258667, "time_total_s": 18785.904147148132, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18785.904147148132, "timesteps_since_restore": 5452800, "iterations_since_restore": 426, "perf": {"cpu_util_percent": 31.057142857142853, "ram_util_percent": 58.84047619047618}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 614.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.115}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.83, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.04, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0, 522.0, 587.0, 584.0, 579.0, 633.0, 633.0, 630.0, 639.0, 582.0, 582.0, 633.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 636.0, 630.0, 579.0, 582.0, 587.0, 633.0, 630.0, 633.0, 587.0, 636.0, 582.0, 579.0, 576.0, 630.0, 561.0, 627.0, 627.0, 630.0, 564.0, 630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 268.0, 254.0, 298.0, 289.0, 293.0, 291.0, 293.0, 286.0, 314.0, 319.0, 317.0, 316.0, 313.0, 317.0, 319.0, 320.0, 293.0, 289.0, 293.0, 289.0, 314.0, 319.0, 319.0, 311.0, 316.0, 317.0, 286.0, 293.0, 319.0, 317.0, 293.0, 289.0, 290.0, 289.0, 317.0, 319.0, 319.0, 311.0, 291.0, 288.0, 290.0, 292.0, 293.0, 294.0, 316.0, 317.0, 319.0, 311.0, 316.0, 317.0, 288.0, 299.0, 319.0, 317.0, 291.0, 291.0, 288.0, 291.0, 288.0, 288.0, 309.0, 321.0, 278.0, 283.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8041036336722438, "mean_processing_ms": 0.2292823831802886, "mean_inference_ms": 1.4068110858832141}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10248000, "num_steps_sampled": 5465600, "sample_time_ms": 22514.038, "load_time_ms": 36.353, "grad_time_ms": 10948.965, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00023957279336173087, "policy_loss": -0.006577346473932266, "vf_loss": 73.66693878173828, "vf_explained_var": 0.7691845297813416, "kl": 0.001824389211833477, "entropy": 1.099536418914795, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5465600, "episodes_total": 13664, "training_iteration": 427, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-26-47", "timestamp": 1660260407, "time_this_iter_s": 30.428364992141724, "time_total_s": 18816.332512140274, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18816.332512140274, "timesteps_since_restore": 5465600, "iterations_since_restore": 427, "perf": {"cpu_util_percent": 33.06976744186046, "ram_util_percent": 59.05581395348838}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 614.26, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.13}, "custom_metrics": {"sparse_reward_mean": 213.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.86, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.72, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.88, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.79, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.03, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.88, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.88, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0, 627.0, 627.0, 630.0, 564.0, 630.0, 624.0, 636.0, 627.0, 582.0, 636.0, 636.0, 630.0, 573.0, 630.0, 630.0, 579.0, 633.0, 636.0, 630.0, 639.0, 627.0, 582.0, 639.0, 587.0, 630.0, 636.0, 584.0, 633.0, 630.0, 579.0, 618.0, 630.0, 627.0, 636.0, 587.0, 636.0, 630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 311.0, 313.0, 314.0, 319.0, 311.0, 284.0, 280.0, 313.0, 317.0, 320.0, 304.0, 319.0, 317.0, 318.0, 309.0, 296.0, 286.0, 317.0, 319.0, 319.0, 317.0, 313.0, 317.0, 284.0, 289.0, 323.0, 307.0, 311.0, 319.0, 298.0, 281.0, 316.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 313.0, 314.0, 286.0, 296.0, 322.0, 317.0, 283.0, 304.0, 316.0, 314.0, 314.0, 322.0, 295.0, 289.0, 319.0, 314.0, 314.0, 316.0, 288.0, 291.0, 299.0, 319.0, 316.0, 314.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.803277493605749, "mean_processing_ms": 0.22911824330865546, "mean_inference_ms": 1.4058271454809743}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10272000, "num_steps_sampled": 5478400, "sample_time_ms": 22384.398, "load_time_ms": 36.404, "grad_time_ms": 10797.09, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016918530454859138, "policy_loss": -0.005912030581384897, "vf_loss": 81.57828521728516, "vf_explained_var": 0.7692078948020935, "kl": 0.0022940493654459715, "entropy": 1.1078943014144897, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5478400, "episodes_total": 13696, "training_iteration": 428, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-27-18", "timestamp": 1660260438, "time_this_iter_s": 31.51498508453369, "time_total_s": 18847.847497224808, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18847.847497224808, "timesteps_since_restore": 5478400, "iterations_since_restore": 428, "perf": {"cpu_util_percent": 32.73555555555556, "ram_util_percent": 59.61555555555556}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 610.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 305.475}, "custom_metrics": {"sparse_reward_mean": 211.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.35, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.98, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.66, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.73, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.91, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.66, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.66, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0, 627.0, 636.0, 587.0, 636.0, 630.0, 633.0, 573.0, 582.0, 579.0, 639.0, 636.0, 636.0, 633.0, 579.0, 630.0, 630.0, 630.0, 582.0, 627.0, 582.0, 630.0, 621.0, 630.0, 630.0, 633.0, 636.0, 627.0, 636.0, 584.0, 636.0, 633.0, 636.0, 630.0, 627.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 316.0, 313.0, 323.0, 293.0, 294.0, 316.0, 320.0, 311.0, 319.0, 317.0, 316.0, 289.0, 284.0, 293.0, 289.0, 287.0, 292.0, 317.0, 322.0, 314.0, 322.0, 316.0, 320.0, 316.0, 317.0, 297.0, 282.0, 317.0, 313.0, 314.0, 316.0, 324.0, 306.0, 286.0, 296.0, 318.0, 309.0, 297.0, 285.0, 311.0, 319.0, 319.0, 302.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 316.0, 320.0, 319.0, 308.0, 319.0, 317.0, 290.0, 294.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8024594853265918, "mean_processing_ms": 0.2289562714759904, "mean_inference_ms": 1.4049375994877125}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10296000, "num_steps_sampled": 5491200, "sample_time_ms": 22179.532, "load_time_ms": 37.018, "grad_time_ms": 10788.772, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015958556905388832, "policy_loss": -0.010051627643406391, "vf_loss": 90.0771713256836, "vf_explained_var": 0.7485197186470032, "kl": 0.0020946140866726637, "entropy": 1.1038951873779297, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5491200, "episodes_total": 13728, "training_iteration": 429, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-27-50", "timestamp": 1660260470, "time_this_iter_s": 31.638920783996582, "time_total_s": 18879.486418008804, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18879.486418008804, "timesteps_since_restore": 5491200, "iterations_since_restore": 429, "perf": {"cpu_util_percent": 30.328888888888887, "ram_util_percent": 59.14666666666665}} -{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 605.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.585}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 185.97, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.35, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.68, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.17, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.99, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0, 630.0, 627.0, 633.0, 636.0, 636.0, 582.0, 582.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 465.0, 630.0, 584.0, 570.0, 587.0, 627.0, 633.0, 630.0, 587.0, 636.0, 639.0, 462.0, 639.0, 633.0, 567.0, 582.0, 576.0, 582.0, 636.0, 633.0, 590.0, 630.0, 579.0, 636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 314.0, 316.0, 316.0, 311.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 296.0, 286.0, 289.0, 293.0, 316.0, 317.0, 322.0, 317.0, 321.0, 312.0, 316.0, 320.0, 319.0, 317.0, 314.0, 319.0, 231.0, 234.0, 317.0, 313.0, 288.0, 296.0, 294.0, 276.0, 293.0, 294.0, 321.0, 306.0, 314.0, 319.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 316.0, 323.0, 230.0, 232.0, 319.0, 320.0, 311.0, 322.0, 276.0, 291.0, 288.0, 294.0, 293.0, 283.0, 280.0, 302.0, 317.0, 319.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.801647414418135, "mean_processing_ms": 0.22879516876474576, "mean_inference_ms": 1.404093752736684}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10320000, "num_steps_sampled": 5504000, "sample_time_ms": 21723.978, "load_time_ms": 37.011, "grad_time_ms": 10801.836, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -4.737731069326401e-05, "policy_loss": -0.00790297333151102, "vf_loss": 84.07501220703125, "vf_explained_var": 0.7714950442314148, "kl": 0.0018305158009752631, "entropy": 1.1038156747817993, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5504000, "episodes_total": 13760, "training_iteration": 430, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-28-22", "timestamp": 1660260502, "time_this_iter_s": 32.1021990776062, "time_total_s": 18911.58861708641, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18911.58861708641, "timesteps_since_restore": 5504000, "iterations_since_restore": 430, "perf": {"cpu_util_percent": 34.26, "ram_util_percent": 59.419999999999995}} -{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 607.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.515}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.23, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.44, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.54, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0, 633.0, 590.0, 630.0, 579.0, 636.0, 639.0, 570.0, 639.0, 590.0, 579.0, 579.0, 639.0, 636.0, 584.0, 522.0, 582.0, 627.0, 633.0, 618.0, 627.0, 584.0, 633.0, 639.0, 576.0, 582.0, 636.0, 630.0, 522.0, 627.0, 636.0, 630.0, 630.0, 525.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 314.0, 319.0, 301.0, 289.0, 316.0, 314.0, 290.0, 289.0, 316.0, 320.0, 319.0, 320.0, 290.0, 280.0, 324.0, 315.0, 293.0, 297.0, 297.0, 282.0, 285.0, 294.0, 319.0, 320.0, 314.0, 322.0, 293.0, 291.0, 265.0, 257.0, 297.0, 285.0, 316.0, 311.0, 317.0, 316.0, 307.0, 311.0, 323.0, 304.0, 299.0, 285.0, 319.0, 314.0, 319.0, 320.0, 285.0, 291.0, 291.0, 291.0, 322.0, 314.0, 314.0, 316.0, 267.0, 255.0, 313.0, 314.0, 319.0, 317.0, 319.0, 311.0, 313.0, 317.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8008314445541891, "mean_processing_ms": 0.2286322337318132, "mean_inference_ms": 1.4031615335106213}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10344000, "num_steps_sampled": 5516800, "sample_time_ms": 21234.37, "load_time_ms": 37.138, "grad_time_ms": 10446.294, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004487487021833658, "policy_loss": -0.002838247222825885, "vf_loss": 78.8043441772461, "vf_explained_var": 0.7659228444099426, "kl": 0.0018056267872452736, "entropy": 1.109397053718567, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5516800, "episodes_total": 13792, "training_iteration": 431, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-28-51", "timestamp": 1660260531, "time_this_iter_s": 29.235426902770996, "time_total_s": 18940.82404398918, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18940.82404398918, "timesteps_since_restore": 5516800, "iterations_since_restore": 431, "perf": {"cpu_util_percent": 33.543902439024386, "ram_util_percent": 59.482926829268294}} -{"episode_reward_max": 639.0, "episode_reward_min": 351.0, "episode_reward_mean": 606.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.24}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 186.08, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.95, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 525.0, 630.0, 630.0, 633.0, 630.0, 579.0, 630.0, 630.0, 351.0, 633.0, 636.0, 582.0, 582.0, 587.0, 576.0, 582.0, 633.0, 633.0, 633.0, 633.0, 544.0, 633.0, 579.0, 582.0, 639.0, 633.0, 636.0, 639.0, 558.0, 576.0, 636.0, 627.0, 576.0, 636.0, 587.0, 630.0, 630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 265.0, 260.0, 311.0, 319.0, 319.0, 311.0, 314.0, 319.0, 311.0, 319.0, 293.0, 286.0, 311.0, 319.0, 313.0, 317.0, 177.0, 174.0, 314.0, 319.0, 319.0, 317.0, 294.0, 288.0, 286.0, 296.0, 293.0, 294.0, 282.0, 294.0, 288.0, 294.0, 316.0, 317.0, 316.0, 317.0, 324.0, 309.0, 316.0, 317.0, 270.0, 274.0, 311.0, 322.0, 288.0, 291.0, 286.0, 296.0, 317.0, 322.0, 319.0, 314.0, 319.0, 317.0, 322.0, 317.0, 276.0, 282.0, 285.0, 291.0, 319.0, 317.0, 313.0, 314.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0]}, "sampler_perf": {"mean_env_wait_ms": 0.8000283085438272, "mean_processing_ms": 0.22847315169555785, "mean_inference_ms": 1.4024220813317556}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10368000, "num_steps_sampled": 5529600, "sample_time_ms": 21335.175, "load_time_ms": 37.375, "grad_time_ms": 10351.537, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001511982991360128, "policy_loss": -0.006039683241397142, "vf_loss": 81.0816650390625, "vf_explained_var": 0.766996443271637, "kl": 0.0019059469923377037, "entropy": 1.1129895448684692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5529600, "episodes_total": 13824, "training_iteration": 432, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-29-27", "timestamp": 1660260567, "time_this_iter_s": 35.764232873916626, "time_total_s": 18976.588276863098, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 18976.588276863098, "timesteps_since_restore": 5529600, "iterations_since_restore": 432, "perf": {"cpu_util_percent": 31.023529411764706, "ram_util_percent": 59.011764705882364}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 609.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.97}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.14, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.04, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.51, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.86, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.37, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.51, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.51, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0, 576.0, 636.0, 587.0, 630.0, 630.0, 636.0, 624.0, 630.0, 630.0, 633.0, 587.0, 630.0, 633.0, 630.0, 639.0, 639.0, 630.0, 584.0, 564.0, 533.0, 636.0, 561.0, 630.0, 627.0, 522.0, 630.0, 636.0, 636.0, 624.0, 630.0, 587.0, 582.0, 636.0, 587.0, 587.0, 624.0, 579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 288.0, 288.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 311.0, 319.0, 314.0, 322.0, 306.0, 318.0, 319.0, 311.0, 321.0, 309.0, 319.0, 314.0, 296.0, 291.0, 319.0, 311.0, 314.0, 319.0, 314.0, 316.0, 319.0, 320.0, 325.0, 314.0, 316.0, 314.0, 293.0, 291.0, 288.0, 276.0, 259.0, 274.0, 314.0, 322.0, 279.0, 282.0, 316.0, 314.0, 311.0, 316.0, 257.0, 265.0, 317.0, 313.0, 314.0, 322.0, 317.0, 319.0, 316.0, 308.0, 311.0, 319.0, 288.0, 299.0, 288.0, 294.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7992234752910505, "mean_processing_ms": 0.2283129313200136, "mean_inference_ms": 1.4016145546737357}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10392000, "num_steps_sampled": 5542400, "sample_time_ms": 21105.654, "load_time_ms": 37.195, "grad_time_ms": 10237.421, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00017731667321640998, "policy_loss": -0.007162818219512701, "vf_loss": 79.00240325927734, "vf_explained_var": 0.7636518478393555, "kl": 0.0019576705526560545, "entropy": 1.1202179193496704, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5542400, "episodes_total": 13856, "training_iteration": 433, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-29-57", "timestamp": 1660260597, "time_this_iter_s": 30.04263925552368, "time_total_s": 19006.630916118622, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19006.630916118622, "timesteps_since_restore": 5542400, "iterations_since_restore": 433, "perf": {"cpu_util_percent": 28.414285714285718, "ram_util_percent": 59.08571428571428}} -{"episode_reward_max": 636.0, "episode_reward_min": 522.0, "episode_reward_mean": 608.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.035}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.27, "shaped_reward_min": 162, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.01, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0, 636.0, 587.0, 587.0, 624.0, 579.0, 633.0, 630.0, 636.0, 587.0, 579.0, 633.0, 630.0, 587.0, 630.0, 576.0, 582.0, 630.0, 627.0, 525.0, 582.0, 630.0, 582.0, 582.0, 582.0, 633.0, 624.0, 582.0, 636.0, 522.0, 633.0, 636.0, 636.0, 636.0, 630.0, 582.0, 630.0, 633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 324.0, 312.0, 296.0, 291.0, 286.0, 301.0, 313.0, 311.0, 290.0, 289.0, 316.0, 317.0, 311.0, 319.0, 314.0, 322.0, 288.0, 299.0, 288.0, 291.0, 311.0, 322.0, 316.0, 314.0, 301.0, 286.0, 311.0, 319.0, 305.0, 271.0, 286.0, 296.0, 313.0, 317.0, 316.0, 311.0, 265.0, 260.0, 291.0, 291.0, 313.0, 317.0, 287.0, 295.0, 285.0, 297.0, 293.0, 289.0, 319.0, 314.0, 316.0, 308.0, 291.0, 291.0, 319.0, 317.0, 267.0, 255.0, 321.0, 312.0, 314.0, 322.0, 317.0, 319.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7984267196562377, "mean_processing_ms": 0.2281543074598729, "mean_inference_ms": 1.4008634634421164}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10416000, "num_steps_sampled": 5555200, "sample_time_ms": 20963.356, "load_time_ms": 37.144, "grad_time_ms": 9878.857, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020503299310803413, "policy_loss": -0.0053678578697144985, "vf_loss": 79.79612731933594, "vf_explained_var": 0.7709012627601624, "kl": 0.0022744529414922, "entropy": 1.1228529214859009, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5555200, "episodes_total": 13888, "training_iteration": 434, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-30-26", "timestamp": 1660260626, "time_this_iter_s": 28.891623735427856, "time_total_s": 19035.52253985405, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19035.52253985405, "timesteps_since_restore": 5555200, "iterations_since_restore": 434, "perf": {"cpu_util_percent": 35.10975609756097, "ram_util_percent": 59.18292682926829}} -{"episode_reward_max": 639.0, "episode_reward_min": 539.0, "episode_reward_mean": 609.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 268.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 304.505}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.81, "shaped_reward_min": 170, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.62, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0, 636.0, 630.0, 582.0, 630.0, 633.0, 582.0, 579.0, 636.0, 579.0, 582.0, 576.0, 587.0, 627.0, 630.0, 587.0, 630.0, 636.0, 633.0, 636.0, 636.0, 624.0, 630.0, 582.0, 633.0, 630.0, 633.0, 582.0, 636.0, 582.0, 630.0, 630.0, 633.0, 636.0, 582.0, 582.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 317.0, 319.0, 316.0, 314.0, 298.0, 284.0, 316.0, 314.0, 314.0, 319.0, 286.0, 296.0, 288.0, 291.0, 319.0, 317.0, 293.0, 286.0, 286.0, 296.0, 292.0, 284.0, 296.0, 291.0, 313.0, 314.0, 316.0, 314.0, 293.0, 294.0, 314.0, 316.0, 324.0, 312.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 310.0, 314.0, 322.0, 308.0, 290.0, 292.0, 316.0, 317.0, 316.0, 314.0, 317.0, 316.0, 293.0, 289.0, 317.0, 319.0, 289.0, 293.0, 316.0, 314.0, 306.0, 324.0, 314.0, 319.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7976245186877899, "mean_processing_ms": 0.22799297194787038, "mean_inference_ms": 1.3999329365484723}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10440000, "num_steps_sampled": 5568000, "sample_time_ms": 21006.175, "load_time_ms": 37.426, "grad_time_ms": 9977.404, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013511897996068, "policy_loss": -0.006047597620636225, "vf_loss": 79.5729751586914, "vf_explained_var": 0.7702791094779968, "kl": 0.001890461309812963, "entropy": 1.1170209646224976, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5568000, "episodes_total": 13920, "training_iteration": 435, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-30-58", "timestamp": 1660260658, "time_this_iter_s": 31.331193923950195, "time_total_s": 19066.853733778, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19066.853733778, "timesteps_since_restore": 5568000, "iterations_since_restore": 435, "perf": {"cpu_util_percent": 33.626666666666665, "ram_util_percent": 58.966666666666676}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 605.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.93}, "custom_metrics": {"sparse_reward_mean": 209.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.46, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.61, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.61, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.61, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0, 636.0, 582.0, 582.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 636.0, 582.0, 630.0, 630.0, 636.0, 587.0, 633.0, 633.0, 636.0, 636.0, 570.0, 576.0, 624.0, 587.0, 636.0, 582.0, 587.0, 582.0, 576.0, 579.0, 582.0, 630.0, 627.0, 633.0, 590.0, 636.0, 539.0, 582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 317.0, 319.0, 293.0, 289.0, 289.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 293.0, 289.0, 288.0, 294.0, 319.0, 314.0, 317.0, 319.0, 294.0, 288.0, 313.0, 317.0, 316.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 314.0, 319.0, 314.0, 319.0, 317.0, 317.0, 319.0, 277.0, 293.0, 295.0, 281.0, 308.0, 316.0, 296.0, 291.0, 319.0, 317.0, 296.0, 286.0, 291.0, 296.0, 288.0, 294.0, 290.0, 286.0, 293.0, 286.0, 296.0, 286.0, 316.0, 314.0, 313.0, 314.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7968373127766308, "mean_processing_ms": 0.22783629500024907, "mean_inference_ms": 1.3993492052434942}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10464000, "num_steps_sampled": 5580800, "sample_time_ms": 21691.27, "load_time_ms": 37.308, "grad_time_ms": 10128.337, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022821128368377686, "policy_loss": -0.005102970637381077, "vf_loss": 79.3483657836914, "vf_explained_var": 0.7713219523429871, "kl": 0.0023417342454195023, "entropy": 1.0995064973831177, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5580800, "episodes_total": 13952, "training_iteration": 436, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-31-35", "timestamp": 1660260695, "time_this_iter_s": 37.88511109352112, "time_total_s": 19104.73884487152, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19104.73884487152, "timesteps_since_restore": 5580800, "iterations_since_restore": 436, "perf": {"cpu_util_percent": 29.92452830188679, "ram_util_percent": 59.107547169811326}} -{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 607.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.775}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.95, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.66, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0, 633.0, 590.0, 636.0, 539.0, 582.0, 630.0, 630.0, 587.0, 582.0, 639.0, 587.0, 584.0, 584.0, 633.0, 633.0, 582.0, 587.0, 630.0, 633.0, 633.0, 582.0, 582.0, 630.0, 630.0, 630.0, 633.0, 590.0, 633.0, 630.0, 630.0, 587.0, 630.0, 584.0, 587.0, 576.0, 582.0, 630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 319.0, 314.0, 299.0, 291.0, 319.0, 317.0, 268.0, 271.0, 288.0, 294.0, 311.0, 319.0, 321.0, 309.0, 291.0, 296.0, 301.0, 281.0, 319.0, 320.0, 293.0, 294.0, 286.0, 298.0, 288.0, 296.0, 314.0, 319.0, 319.0, 314.0, 293.0, 289.0, 293.0, 294.0, 314.0, 316.0, 313.0, 320.0, 316.0, 317.0, 291.0, 291.0, 288.0, 294.0, 316.0, 314.0, 316.0, 314.0, 319.0, 311.0, 324.0, 309.0, 296.0, 294.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 301.0, 286.0, 319.0, 311.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.796054313311972, "mean_processing_ms": 0.22768002877826965, "mean_inference_ms": 1.3988094341608432}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10488000, "num_steps_sampled": 5593600, "sample_time_ms": 21763.453, "load_time_ms": 37.236, "grad_time_ms": 10078.856, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00570017471909523, "policy_loss": -0.002029874362051487, "vf_loss": 82.7793960571289, "vf_explained_var": 0.7683680653572083, "kl": 0.002793658524751663, "entropy": 1.095770001411438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5593600, "episodes_total": 13984, "training_iteration": 437, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-32-06", "timestamp": 1660260726, "time_this_iter_s": 30.656537771224976, "time_total_s": 19135.395382642746, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19135.395382642746, "timesteps_since_restore": 5593600, "iterations_since_restore": 437, "perf": {"cpu_util_percent": 32.4046511627907, "ram_util_percent": 59.06744186046511}} -{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 609.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.745}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.89, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.23, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.77, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.75, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0, 584.0, 587.0, 576.0, 582.0, 630.0, 579.0, 630.0, 582.0, 630.0, 587.0, 630.0, 522.0, 636.0, 590.0, 633.0, 582.0, 636.0, 636.0, 639.0, 582.0, 582.0, 633.0, 579.0, 579.0, 536.0, 582.0, 636.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 582.0, 582.0, 627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 293.0, 291.0, 299.0, 288.0, 274.0, 302.0, 286.0, 296.0, 314.0, 316.0, 296.0, 283.0, 316.0, 314.0, 293.0, 289.0, 316.0, 314.0, 301.0, 286.0, 314.0, 316.0, 272.0, 250.0, 319.0, 317.0, 296.0, 294.0, 311.0, 322.0, 299.0, 283.0, 319.0, 317.0, 322.0, 314.0, 319.0, 320.0, 294.0, 288.0, 289.0, 293.0, 321.0, 312.0, 286.0, 293.0, 288.0, 291.0, 267.0, 269.0, 293.0, 289.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 285.0, 302.0, 288.0, 299.0, 321.0, 318.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7952795152199544, "mean_processing_ms": 0.22752629484006134, "mean_inference_ms": 1.3983582965229155}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10512000, "num_steps_sampled": 5606400, "sample_time_ms": 21892.052, "load_time_ms": 37.192, "grad_time_ms": 10267.0, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008096967940218747, "policy_loss": -0.008205131627619267, "vf_loss": 79.47277069091797, "vf_explained_var": 0.777022659778595, "kl": 0.002324033295735717, "entropy": 1.1036995649337769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5606400, "episodes_total": 14016, "training_iteration": 438, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-32-41", "timestamp": 1660260761, "time_this_iter_s": 34.68048119544983, "time_total_s": 19170.075863838196, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19170.075863838196, "timesteps_since_restore": 5606400, "iterations_since_restore": 438, "perf": {"cpu_util_percent": 33.92857142857143, "ram_util_percent": 59.06734693877551}} -{"episode_reward_max": 639.0, "episode_reward_min": 311.0, "episode_reward_mean": 616.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.265}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 189.33, "shaped_reward_min": 111, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.54, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.34, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.91, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.34, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.34, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0, 630.0, 633.0, 582.0, 582.0, 627.0, 639.0, 627.0, 639.0, 633.0, 630.0, 636.0, 587.0, 587.0, 639.0, 630.0, 633.0, 630.0, 582.0, 573.0, 639.0, 587.0, 636.0, 633.0, 630.0, 581.0, 633.0, 582.0, 633.0, 311.0, 636.0, 630.0, 633.0, 636.0, 584.0, 636.0, 636.0, 627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 316.0, 314.0, 314.0, 319.0, 294.0, 288.0, 283.0, 299.0, 316.0, 311.0, 322.0, 317.0, 313.0, 314.0, 325.0, 314.0, 314.0, 319.0, 321.0, 309.0, 316.0, 320.0, 288.0, 299.0, 301.0, 286.0, 317.0, 322.0, 319.0, 311.0, 316.0, 317.0, 316.0, 314.0, 279.0, 303.0, 289.0, 284.0, 319.0, 320.0, 291.0, 296.0, 321.0, 315.0, 316.0, 317.0, 316.0, 314.0, 293.0, 288.0, 316.0, 317.0, 293.0, 289.0, 316.0, 317.0, 151.0, 160.0, 319.0, 317.0, 316.0, 314.0, 319.0, 314.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7945004798552335, "mean_processing_ms": 0.22737090000954274, "mean_inference_ms": 1.3975837411593142}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10536000, "num_steps_sampled": 5619200, "sample_time_ms": 21744.649, "load_time_ms": 36.586, "grad_time_ms": 10113.951, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006775472429580986, "policy_loss": -0.006301699206233025, "vf_loss": 75.32054138183594, "vf_explained_var": 0.7757834792137146, "kl": 0.0017814143793657422, "entropy": 1.1056231260299683, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5619200, "episodes_total": 14048, "training_iteration": 439, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-33-09", "timestamp": 1660260789, "time_this_iter_s": 28.622015953063965, "time_total_s": 19198.69787979126, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19198.69787979126, "timesteps_since_restore": 5619200, "iterations_since_restore": 439, "perf": {"cpu_util_percent": 31.939024390243897, "ram_util_percent": 59.1219512195122}} -{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 620.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 310.075}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 190.15, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.78, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.29, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.46, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.77, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.46, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.46, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0, 636.0, 584.0, 636.0, 636.0, 627.0, 639.0, 630.0, 636.0, 633.0, 636.0, 587.0, 639.0, 579.0, 636.0, 630.0, 627.0, 633.0, 639.0, 636.0, 564.0, 630.0, 630.0, 636.0, 636.0, 636.0, 639.0, 636.0, 633.0, 630.0, 633.0, 579.0, 627.0, 408.0, 579.0, 630.0, 582.0, 627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 317.0, 291.0, 293.0, 311.0, 325.0, 319.0, 317.0, 309.0, 318.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 285.0, 302.0, 319.0, 320.0, 285.0, 294.0, 319.0, 317.0, 311.0, 319.0, 313.0, 314.0, 313.0, 320.0, 319.0, 320.0, 324.0, 312.0, 274.0, 290.0, 315.0, 315.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 319.0, 317.0, 319.0, 320.0, 319.0, 317.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 285.0, 294.0, 316.0, 311.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7937258148671773, "mean_processing_ms": 0.22721751341177562, "mean_inference_ms": 1.3967610737623508}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10560000, "num_steps_sampled": 5632000, "sample_time_ms": 21732.229, "load_time_ms": 36.475, "grad_time_ms": 9880.218, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00015868060290813446, "policy_loss": -0.007246671710163355, "vf_loss": 76.3777847290039, "vf_explained_var": 0.7677585482597351, "kl": 0.0021524711046367884, "entropy": 1.099584698677063, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5632000, "episodes_total": 14080, "training_iteration": 440, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-33-39", "timestamp": 1660260819, "time_this_iter_s": 29.637184143066406, "time_total_s": 19228.335063934326, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19228.335063934326, "timesteps_since_restore": 5632000, "iterations_since_restore": 440, "perf": {"cpu_util_percent": 33.75, "ram_util_percent": 59.899999999999984}} -{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 617.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 198.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 308.88}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 189.36, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.6, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 18.01, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.27, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.84, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.16, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.97, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.75, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.73, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.27, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.27, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0, 408.0, 579.0, 630.0, 582.0, 627.0, 639.0, 636.0, 630.0, 579.0, 587.0, 627.0, 636.0, 633.0, 633.0, 636.0, 636.0, 636.0, 630.0, 630.0, 630.0, 633.0, 636.0, 636.0, 636.0, 639.0, 630.0, 627.0, 636.0, 579.0, 636.0, 590.0, 636.0, 570.0, 636.0, 579.0, 639.0, 584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 210.0, 198.0, 283.0, 296.0, 316.0, 314.0, 290.0, 292.0, 316.0, 311.0, 322.0, 317.0, 319.0, 317.0, 318.0, 312.0, 277.0, 302.0, 291.0, 296.0, 313.0, 314.0, 319.0, 317.0, 315.0, 318.0, 314.0, 319.0, 316.0, 320.0, 324.0, 312.0, 319.0, 317.0, 321.0, 309.0, 316.0, 314.0, 311.0, 319.0, 316.0, 317.0, 319.0, 317.0, 324.0, 312.0, 316.0, 320.0, 319.0, 320.0, 319.0, 311.0, 310.0, 317.0, 319.0, 317.0, 293.0, 286.0, 322.0, 314.0, 296.0, 294.0, 314.0, 322.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7929580463765498, "mean_processing_ms": 0.22706572704499173, "mean_inference_ms": 1.3960311893008097}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10584000, "num_steps_sampled": 5644800, "sample_time_ms": 22196.121, "load_time_ms": 36.22, "grad_time_ms": 10110.176, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019730820786207914, "policy_loss": -0.005001601297408342, "vf_loss": 75.2809066772461, "vf_explained_var": 0.770819902420044, "kl": 0.0019049126422032714, "entropy": 1.106797695159912, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5644800, "episodes_total": 14112, "training_iteration": 441, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-34-15", "timestamp": 1660260855, "time_this_iter_s": 36.16889190673828, "time_total_s": 19264.503955841064, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19264.503955841064, "timesteps_since_restore": 5644800, "iterations_since_restore": 441, "perf": {"cpu_util_percent": 30.368627450980394, "ram_util_percent": 59.160784313725486}} -{"episode_reward_max": 639.0, "episode_reward_min": 564.0, "episode_reward_mean": 618.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 279.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.15}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 189.1, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.51, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.85, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.74, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.31, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0, 570.0, 636.0, 579.0, 639.0, 584.0, 633.0, 627.0, 582.0, 630.0, 636.0, 636.0, 630.0, 639.0, 627.0, 579.0, 624.0, 636.0, 630.0, 582.0, 636.0, 639.0, 633.0, 633.0, 639.0, 576.0, 636.0, 573.0, 630.0, 587.0, 633.0, 630.0, 636.0, 639.0, 633.0, 630.0, 587.0, 633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 290.0, 280.0, 311.0, 325.0, 288.0, 291.0, 319.0, 320.0, 290.0, 294.0, 319.0, 314.0, 311.0, 316.0, 288.0, 294.0, 314.0, 316.0, 319.0, 317.0, 319.0, 317.0, 316.0, 314.0, 319.0, 320.0, 306.0, 321.0, 293.0, 286.0, 307.0, 317.0, 319.0, 317.0, 311.0, 319.0, 293.0, 289.0, 321.0, 315.0, 322.0, 317.0, 317.0, 316.0, 311.0, 322.0, 322.0, 317.0, 293.0, 283.0, 319.0, 317.0, 279.0, 294.0, 308.0, 322.0, 285.0, 302.0, 316.0, 317.0, 305.0, 325.0, 319.0, 317.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7921926702018212, "mean_processing_ms": 0.22691451487922393, "mean_inference_ms": 1.3953653520568468}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10608000, "num_steps_sampled": 5657600, "sample_time_ms": 21841.972, "load_time_ms": 35.995, "grad_time_ms": 10215.543, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027040394488722086, "policy_loss": -0.004097369499504566, "vf_loss": 73.54324340820312, "vf_explained_var": 0.7729549407958984, "kl": 0.0019481302006170154, "entropy": 1.105837106704712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5657600, "episodes_total": 14144, "training_iteration": 442, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-34-49", "timestamp": 1660260889, "time_this_iter_s": 33.27770400047302, "time_total_s": 19297.781659841537, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19297.781659841537, "timesteps_since_restore": 5657600, "iterations_since_restore": 442, "perf": {"cpu_util_percent": 29.43404255319148, "ram_util_percent": 59.22553191489361}} -{"episode_reward_max": 639.0, "episode_reward_min": 564.0, "episode_reward_mean": 615.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 280.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.955}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.71, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 6.2, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.92, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0, 639.0, 633.0, 630.0, 587.0, 633.0, 633.0, 627.0, 627.0, 636.0, 582.0, 630.0, 633.0, 639.0, 570.0, 582.0, 576.0, 636.0, 639.0, 636.0, 582.0, 630.0, 627.0, 624.0, 587.0, 630.0, 633.0, 582.0, 630.0, 636.0, 582.0, 576.0, 633.0, 633.0, 630.0, 639.0, 636.0, 627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 320.0, 319.0, 314.0, 318.0, 312.0, 288.0, 299.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 316.0, 311.0, 320.0, 316.0, 288.0, 294.0, 311.0, 319.0, 319.0, 314.0, 319.0, 320.0, 289.0, 281.0, 291.0, 291.0, 288.0, 288.0, 316.0, 320.0, 319.0, 320.0, 319.0, 317.0, 295.0, 287.0, 313.0, 317.0, 319.0, 308.0, 310.0, 314.0, 296.0, 291.0, 314.0, 316.0, 313.0, 320.0, 301.0, 281.0, 316.0, 314.0, 314.0, 322.0, 291.0, 291.0, 288.0, 288.0, 316.0, 317.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7914282474248366, "mean_processing_ms": 0.22676310167632485, "mean_inference_ms": 1.3947257800951314}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10632000, "num_steps_sampled": 5670400, "sample_time_ms": 21982.654, "load_time_ms": 36.099, "grad_time_ms": 10196.436, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026817016769200563, "policy_loss": -0.004366503097116947, "vf_loss": 76.00869750976562, "vf_explained_var": 0.7792714238166809, "kl": 0.0014469980960711837, "entropy": 1.1053307056427002, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5670400, "episodes_total": 14176, "training_iteration": 443, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-35-20", "timestamp": 1660260920, "time_this_iter_s": 31.264520168304443, "time_total_s": 19329.046180009842, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19329.046180009842, "timesteps_since_restore": 5670400, "iterations_since_restore": 443, "perf": {"cpu_util_percent": 30.41136363636364, "ram_util_percent": 59.12272727272728}} -{"episode_reward_max": 639.0, "episode_reward_min": 479.0, "episode_reward_mean": 614.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.27}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.54, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.25, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.11, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0, 633.0, 630.0, 639.0, 636.0, 627.0, 624.0, 582.0, 636.0, 630.0, 630.0, 633.0, 587.0, 567.0, 630.0, 639.0, 630.0, 636.0, 633.0, 633.0, 630.0, 636.0, 639.0, 636.0, 630.0, 630.0, 630.0, 627.0, 630.0, 582.0, 582.0, 587.0, 630.0, 630.0, 564.0, 576.0, 636.0, 633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 319.0, 314.0, 316.0, 314.0, 320.0, 319.0, 319.0, 317.0, 311.0, 316.0, 313.0, 311.0, 291.0, 291.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 290.0, 297.0, 282.0, 285.0, 319.0, 311.0, 319.0, 320.0, 316.0, 314.0, 314.0, 322.0, 319.0, 314.0, 311.0, 322.0, 314.0, 316.0, 314.0, 322.0, 322.0, 317.0, 319.0, 317.0, 319.0, 311.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 316.0, 314.0, 291.0, 291.0, 290.0, 292.0, 296.0, 291.0, 316.0, 314.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7906650471868468, "mean_processing_ms": 0.2266115620582618, "mean_inference_ms": 1.3940369234530334}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10656000, "num_steps_sampled": 5683200, "sample_time_ms": 22188.83, "load_time_ms": 36.224, "grad_time_ms": 10331.481, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0037986973766237497, "policy_loss": -0.0035794072318822145, "vf_loss": 79.28974151611328, "vf_explained_var": 0.7626357078552246, "kl": 0.0019579820800572634, "entropy": 1.1017413139343262, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5683200, "episodes_total": 14208, "training_iteration": 444, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-35-52", "timestamp": 1660260952, "time_this_iter_s": 32.303210973739624, "time_total_s": 19361.34939098358, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19361.34939098358, "timesteps_since_restore": 5683200, "iterations_since_restore": 444, "perf": {"cpu_util_percent": 30.893478260869564, "ram_util_percent": 59.16521739130435}} -{"episode_reward_max": 639.0, "episode_reward_min": 479.0, "episode_reward_mean": 610.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 239.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.46}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.12, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.18, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.15, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0, 630.0, 564.0, 576.0, 636.0, 633.0, 579.0, 633.0, 636.0, 636.0, 636.0, 567.0, 630.0, 587.0, 587.0, 587.0, 630.0, 639.0, 579.0, 636.0, 630.0, 630.0, 587.0, 633.0, 579.0, 587.0, 587.0, 582.0, 630.0, 630.0, 579.0, 636.0, 633.0, 627.0, 630.0, 587.0, 579.0, 636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 316.0, 314.0, 281.0, 283.0, 290.0, 286.0, 319.0, 317.0, 314.0, 319.0, 296.0, 283.0, 314.0, 319.0, 319.0, 317.0, 317.0, 319.0, 319.0, 317.0, 280.0, 287.0, 311.0, 319.0, 288.0, 299.0, 291.0, 296.0, 296.0, 291.0, 316.0, 314.0, 314.0, 325.0, 285.0, 294.0, 319.0, 317.0, 314.0, 316.0, 319.0, 311.0, 293.0, 294.0, 314.0, 319.0, 283.0, 296.0, 288.0, 299.0, 301.0, 286.0, 296.0, 286.0, 316.0, 314.0, 316.0, 314.0, 298.0, 281.0, 319.0, 317.0, 317.0, 316.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7899054631660849, "mean_processing_ms": 0.22646096684642672, "mean_inference_ms": 1.3933342141949396}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10680000, "num_steps_sampled": 5696000, "sample_time_ms": 22242.608, "load_time_ms": 35.979, "grad_time_ms": 10328.703, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013677343958988786, "policy_loss": -0.006408216897398233, "vf_loss": 83.29845428466797, "vf_explained_var": 0.7579674124717712, "kl": 0.001941792550496757, "entropy": 1.1077739000320435, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5696000, "episodes_total": 14240, "training_iteration": 445, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-36-24", "timestamp": 1660260984, "time_this_iter_s": 31.836724996566772, "time_total_s": 19393.18611598015, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19393.18611598015, "timesteps_since_restore": 5696000, "iterations_since_restore": 445, "perf": {"cpu_util_percent": 30.162222222222226, "ram_util_percent": 59.18000000000001}} -{"episode_reward_max": 639.0, "episode_reward_min": 456.0, "episode_reward_mean": 608.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 304.41}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.22, "shaped_reward_min": 136, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.77, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0, 627.0, 630.0, 587.0, 579.0, 636.0, 630.0, 636.0, 633.0, 624.0, 636.0, 630.0, 633.0, 639.0, 570.0, 636.0, 630.0, 636.0, 582.0, 587.0, 579.0, 584.0, 633.0, 582.0, 587.0, 636.0, 633.0, 621.0, 630.0, 633.0, 584.0, 627.0, 630.0, 633.0, 479.0, 587.0, 587.0, 587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 308.0, 319.0, 311.0, 319.0, 295.0, 292.0, 290.0, 289.0, 319.0, 317.0, 311.0, 319.0, 314.0, 322.0, 314.0, 319.0, 314.0, 310.0, 314.0, 322.0, 311.0, 319.0, 316.0, 317.0, 317.0, 322.0, 288.0, 282.0, 319.0, 317.0, 311.0, 319.0, 319.0, 317.0, 291.0, 291.0, 298.0, 289.0, 294.0, 285.0, 288.0, 296.0, 314.0, 319.0, 296.0, 286.0, 299.0, 288.0, 319.0, 317.0, 316.0, 317.0, 310.0, 311.0, 319.0, 311.0, 322.0, 311.0, 288.0, 296.0, 310.0, 317.0, 313.0, 317.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7891523423128755, "mean_processing_ms": 0.22631170679234558, "mean_inference_ms": 1.3926725732710878}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10704000, "num_steps_sampled": 5708800, "sample_time_ms": 21771.335, "load_time_ms": 36.027, "grad_time_ms": 10385.96, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026401570066809654, "policy_loss": -0.0047949193976819515, "vf_loss": 79.88745880126953, "vf_explained_var": 0.7707352638244629, "kl": 0.0022178192157298326, "entropy": 1.107340693473816, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5708800, "episodes_total": 14272, "training_iteration": 446, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-36-58", "timestamp": 1660261018, "time_this_iter_s": 33.74682116508484, "time_total_s": 19426.932937145233, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19426.932937145233, "timesteps_since_restore": 5708800, "iterations_since_restore": 446, "perf": {"cpu_util_percent": 32.958333333333336, "ram_util_percent": 59.18541666666667}} -{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 602.99, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 301.495}, "custom_metrics": {"sparse_reward_mean": 208.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.79, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.44, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.76, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0, 633.0, 479.0, 587.0, 587.0, 587.0, 582.0, 582.0, 582.0, 630.0, 578.0, 576.0, 587.0, 587.0, 630.0, 636.0, 630.0, 639.0, 636.0, 636.0, 579.0, 630.0, 636.0, 627.0, 636.0, 578.0, 633.0, 633.0, 582.0, 636.0, 636.0, 587.0, 590.0, 639.0, 630.0, 630.0, 582.0, 627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 316.0, 317.0, 239.0, 240.0, 293.0, 294.0, 296.0, 291.0, 282.0, 305.0, 291.0, 291.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 284.0, 294.0, 295.0, 281.0, 299.0, 288.0, 288.0, 299.0, 308.0, 322.0, 319.0, 317.0, 311.0, 319.0, 319.0, 320.0, 319.0, 317.0, 317.0, 319.0, 288.0, 291.0, 316.0, 314.0, 327.0, 309.0, 315.0, 312.0, 319.0, 317.0, 280.0, 298.0, 311.0, 322.0, 319.0, 314.0, 293.0, 289.0, 319.0, 317.0, 319.0, 317.0, 293.0, 294.0, 291.0, 299.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7883964067869212, "mean_processing_ms": 0.22616136742936413, "mean_inference_ms": 1.39185520221877}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10728000, "num_steps_sampled": 5721600, "sample_time_ms": 21601.13, "load_time_ms": 36.066, "grad_time_ms": 10530.811, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004505176562815905, "policy_loss": -0.00313469092361629, "vf_loss": 81.9230728149414, "vf_explained_var": 0.7751343250274658, "kl": 0.002040610648691654, "entropy": 1.1048672199249268, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5721600, "episodes_total": 14304, "training_iteration": 447, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-37-28", "timestamp": 1660261048, "time_this_iter_s": 30.400289058685303, "time_total_s": 19457.33322620392, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19457.33322620392, "timesteps_since_restore": 5721600, "iterations_since_restore": 447, "perf": {"cpu_util_percent": 36.12558139534883, "ram_util_percent": 59.20697674418605}} -{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 604.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.49}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.38, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.19, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.39, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.85, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0, 639.0, 630.0, 630.0, 582.0, 627.0, 579.0, 570.0, 582.0, 636.0, 636.0, 630.0, 587.0, 630.0, 630.0, 456.0, 573.0, 630.0, 579.0, 576.0, 633.0, 587.0, 633.0, 630.0, 630.0, 630.0, 633.0, 582.0, 579.0, 630.0, 639.0, 630.0, 639.0, 587.0, 627.0, 522.0, 582.0, 636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 319.0, 320.0, 316.0, 314.0, 311.0, 319.0, 291.0, 291.0, 310.0, 317.0, 290.0, 289.0, 288.0, 282.0, 291.0, 291.0, 324.0, 312.0, 314.0, 322.0, 314.0, 316.0, 299.0, 288.0, 316.0, 314.0, 316.0, 314.0, 228.0, 228.0, 281.0, 292.0, 316.0, 314.0, 293.0, 286.0, 288.0, 288.0, 316.0, 317.0, 293.0, 294.0, 314.0, 319.0, 321.0, 309.0, 316.0, 314.0, 322.0, 308.0, 319.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 319.0, 320.0, 321.0, 309.0, 319.0, 320.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7876488216946843, "mean_processing_ms": 0.22601276823059913, "mean_inference_ms": 1.3911503009579902}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10752000, "num_steps_sampled": 5734400, "sample_time_ms": 21718.783, "load_time_ms": 35.962, "grad_time_ms": 10435.936, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016340842703357339, "policy_loss": -0.0055215489119291306, "vf_loss": 77.1473388671875, "vf_explained_var": 0.7692286968231201, "kl": 0.001808720058761537, "entropy": 1.1181970834732056, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5734400, "episodes_total": 14336, "training_iteration": 448, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-38-03", "timestamp": 1660261083, "time_this_iter_s": 34.906923055648804, "time_total_s": 19492.240149259567, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19492.240149259567, "timesteps_since_restore": 5734400, "iterations_since_restore": 448, "perf": {"cpu_util_percent": 33.91428571428571, "ram_util_percent": 59.09591836734693}} -{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 605.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 302.61}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 185.62, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.6, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.89, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0, 587.0, 627.0, 522.0, 582.0, 636.0, 627.0, 587.0, 582.0, 636.0, 587.0, 627.0, 636.0, 630.0, 630.0, 587.0, 621.0, 575.0, 639.0, 633.0, 636.0, 584.0, 582.0, 573.0, 498.0, 587.0, 579.0, 408.0, 636.0, 587.0, 587.0, 633.0, 573.0, 636.0, 587.0, 636.0, 582.0, 587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 299.0, 288.0, 319.0, 308.0, 251.0, 271.0, 296.0, 286.0, 319.0, 317.0, 308.0, 319.0, 286.0, 301.0, 293.0, 289.0, 316.0, 320.0, 291.0, 296.0, 313.0, 314.0, 314.0, 322.0, 311.0, 319.0, 314.0, 316.0, 288.0, 299.0, 310.0, 311.0, 288.0, 287.0, 317.0, 322.0, 321.0, 312.0, 319.0, 317.0, 302.0, 282.0, 291.0, 291.0, 282.0, 291.0, 253.0, 245.0, 290.0, 297.0, 288.0, 291.0, 206.0, 202.0, 319.0, 317.0, 293.0, 294.0, 288.0, 299.0, 316.0, 317.0, 285.0, 288.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7869063801847372, "mean_processing_ms": 0.22586438145730278, "mean_inference_ms": 1.3903532948966142}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10776000, "num_steps_sampled": 5747200, "sample_time_ms": 21734.341, "load_time_ms": 36.357, "grad_time_ms": 10633.962, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015590289840474725, "policy_loss": -0.005610723048448563, "vf_loss": 77.2466812133789, "vf_explained_var": 0.7718032002449036, "kl": 0.0017093941569328308, "entropy": 1.1098432540893555, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5747200, "episodes_total": 14368, "training_iteration": 449, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-38-34", "timestamp": 1660261114, "time_this_iter_s": 30.762639045715332, "time_total_s": 19523.002788305283, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19523.002788305283, "timesteps_since_restore": 5747200, "iterations_since_restore": 449, "perf": {"cpu_util_percent": 34.67441860465116, "ram_util_percent": 60.09767441860465}} -{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 607.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.755}, "custom_metrics": {"sparse_reward_mean": 210.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.31, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.27, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.83, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0, 636.0, 587.0, 636.0, 582.0, 587.0, 630.0, 630.0, 630.0, 630.0, 627.0, 630.0, 606.0, 633.0, 519.0, 633.0, 636.0, 573.0, 630.0, 636.0, 636.0, 584.0, 636.0, 627.0, 630.0, 636.0, 633.0, 633.0, 579.0, 633.0, 582.0, 582.0, 590.0, 627.0, 576.0, 570.0, 582.0, 582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 317.0, 299.0, 288.0, 319.0, 317.0, 293.0, 289.0, 290.0, 297.0, 316.0, 314.0, 318.0, 312.0, 316.0, 314.0, 319.0, 311.0, 313.0, 314.0, 316.0, 314.0, 304.0, 302.0, 316.0, 317.0, 267.0, 252.0, 322.0, 311.0, 313.0, 323.0, 278.0, 295.0, 316.0, 314.0, 314.0, 322.0, 319.0, 317.0, 298.0, 286.0, 316.0, 320.0, 313.0, 314.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 314.0, 319.0, 299.0, 280.0, 319.0, 314.0, 286.0, 296.0, 288.0, 294.0, 296.0, 294.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7861726618280127, "mean_processing_ms": 0.22571819485718037, "mean_inference_ms": 1.389515005345911}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10800000, "num_steps_sampled": 5760000, "sample_time_ms": 21545.479, "load_time_ms": 37.135, "grad_time_ms": 10871.751, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00040180576615966856, "policy_loss": -0.007139734923839569, "vf_loss": 80.9995346069336, "vf_explained_var": 0.7637953758239746, "kl": 0.0017641382291913033, "entropy": 1.1168159246444702, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5760000, "episodes_total": 14400, "training_iteration": 450, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-39-04", "timestamp": 1660261144, "time_this_iter_s": 30.14027214050293, "time_total_s": 19553.143060445786, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19553.143060445786, "timesteps_since_restore": 5760000, "iterations_since_restore": 450, "perf": {"cpu_util_percent": 35.460465116279074, "ram_util_percent": 59.4627906976744}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 599.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 299.755}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 183.91, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.25, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0, 627.0, 576.0, 570.0, 582.0, 582.0, 636.0, 587.0, 579.0, 636.0, 582.0, 606.0, 636.0, 587.0, 633.0, 630.0, 630.0, 636.0, 633.0, 633.0, 624.0, 587.0, 627.0, 576.0, 584.0, 570.0, 582.0, 630.0, 579.0, 636.0, 633.0, 576.0, 587.0, 636.0, 582.0, 630.0, 636.0, 573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 308.0, 319.0, 285.0, 291.0, 272.0, 298.0, 282.0, 300.0, 288.0, 294.0, 322.0, 314.0, 290.0, 297.0, 283.0, 296.0, 324.0, 312.0, 285.0, 297.0, 302.0, 304.0, 319.0, 317.0, 287.0, 300.0, 314.0, 319.0, 316.0, 314.0, 311.0, 319.0, 319.0, 317.0, 317.0, 316.0, 316.0, 317.0, 313.0, 311.0, 298.0, 289.0, 313.0, 314.0, 294.0, 282.0, 296.0, 288.0, 279.0, 291.0, 301.0, 281.0, 316.0, 314.0, 293.0, 286.0, 319.0, 317.0, 316.0, 317.0, 285.0, 291.0, 302.0, 285.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7854523352273275, "mean_processing_ms": 0.22557596598023225, "mean_inference_ms": 1.3886629869315275}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10824000, "num_steps_sampled": 5772800, "sample_time_ms": 21458.474, "load_time_ms": 37.866, "grad_time_ms": 10821.902, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004755727481096983, "policy_loss": -0.00296382955275476, "vf_loss": 82.76275634765625, "vf_explained_var": 0.7805452942848206, "kl": 0.0020347917452454567, "entropy": 1.1134214401245117, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5772800, "episodes_total": 14432, "training_iteration": 451, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-39-39", "timestamp": 1660261179, "time_this_iter_s": 34.819623947143555, "time_total_s": 19587.96268439293, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19587.96268439293, "timesteps_since_restore": 5772800, "iterations_since_restore": 451, "perf": {"cpu_util_percent": 35.726, "ram_util_percent": 59.13399999999999}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 600.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.195}, "custom_metrics": {"sparse_reward_mean": 208.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.39, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.35, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.4, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.87, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.71, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.87, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.87, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0, 636.0, 582.0, 630.0, 636.0, 573.0, 582.0, 579.0, 633.0, 630.0, 587.0, 639.0, 582.0, 582.0, 522.0, 633.0, 630.0, 582.0, 633.0, 630.0, 579.0, 519.0, 627.0, 636.0, 627.0, 587.0, 587.0, 630.0, 573.0, 639.0, 630.0, 582.0, 627.0, 627.0, 587.0, 582.0, 587.0, 627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 319.0, 317.0, 288.0, 294.0, 319.0, 311.0, 319.0, 317.0, 282.0, 291.0, 289.0, 293.0, 287.0, 292.0, 319.0, 314.0, 311.0, 319.0, 290.0, 297.0, 319.0, 320.0, 299.0, 283.0, 290.0, 292.0, 264.0, 258.0, 316.0, 317.0, 321.0, 309.0, 291.0, 291.0, 314.0, 319.0, 316.0, 314.0, 296.0, 283.0, 270.0, 249.0, 316.0, 311.0, 311.0, 325.0, 315.0, 312.0, 290.0, 297.0, 295.0, 292.0, 321.0, 309.0, 293.0, 280.0, 322.0, 317.0, 316.0, 314.0, 293.0, 289.0, 308.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7847405370697927, "mean_processing_ms": 0.22543724156360243, "mean_inference_ms": 1.3878004584531793}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10848000, "num_steps_sampled": 5785600, "sample_time_ms": 21368.51, "load_time_ms": 38.205, "grad_time_ms": 10677.018, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004785877245012671, "policy_loss": -0.0067210569977760315, "vf_loss": 77.5384750366211, "vf_explained_var": 0.777080774307251, "kl": 0.0022153640165925026, "entropy": 1.108397126197815, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5785600, "episodes_total": 14464, "training_iteration": 452, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-40-10", "timestamp": 1660261210, "time_this_iter_s": 30.929455280303955, "time_total_s": 19618.892139673233, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19618.892139673233, "timesteps_since_restore": 5785600, "iterations_since_restore": 452, "perf": {"cpu_util_percent": 34.12045454545454, "ram_util_percent": 59.23863636363635}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 601.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 300.725}, "custom_metrics": {"sparse_reward_mean": 208.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 185.05, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0, 627.0, 587.0, 582.0, 587.0, 627.0, 576.0, 630.0, 408.0, 582.0, 582.0, 636.0, 636.0, 582.0, 627.0, 582.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 636.0, 180.0, 630.0, 510.0, 633.0, 582.0, 630.0, 579.0, 636.0, 582.0, 582.0, 630.0, 639.0, 630.0, 570.0, 630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 311.0, 316.0, 296.0, 291.0, 291.0, 291.0, 301.0, 286.0, 319.0, 308.0, 285.0, 291.0, 311.0, 319.0, 213.0, 195.0, 288.0, 294.0, 294.0, 288.0, 321.0, 315.0, 319.0, 317.0, 291.0, 291.0, 311.0, 316.0, 301.0, 281.0, 316.0, 314.0, 321.0, 309.0, 327.0, 306.0, 291.0, 291.0, 311.0, 319.0, 311.0, 319.0, 311.0, 325.0, 91.0, 89.0, 311.0, 319.0, 247.0, 263.0, 311.0, 322.0, 283.0, 299.0, 316.0, 314.0, 288.0, 291.0, 316.0, 320.0, 293.0, 289.0, 291.0, 291.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7840324712041147, "mean_processing_ms": 0.22529998529372405, "mean_inference_ms": 1.3869181064814406}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10872000, "num_steps_sampled": 5798400, "sample_time_ms": 21127.393, "load_time_ms": 38.987, "grad_time_ms": 10639.287, "update_time_ms": 0.003, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022095281165093184, "policy_loss": -0.005497789476066828, "vf_loss": 82.6261215209961, "vf_explained_var": 0.7598109245300293, "kl": 0.0015994912246242166, "entropy": 1.110588550567627, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5798400, "episodes_total": 14496, "training_iteration": 453, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-40-38", "timestamp": 1660261238, "time_this_iter_s": 28.482766151428223, "time_total_s": 19647.37490582466, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19647.37490582466, "timesteps_since_restore": 5798400, "iterations_since_restore": 453, "perf": {"cpu_util_percent": 34.01, "ram_util_percent": 58.98499999999999}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 609.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.53}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.46, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.29, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.14, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.49, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.19, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0, 630.0, 639.0, 630.0, 570.0, 630.0, 582.0, 582.0, 576.0, 609.0, 582.0, 630.0, 636.0, 630.0, 630.0, 633.0, 587.0, 584.0, 582.0, 633.0, 587.0, 636.0, 639.0, 630.0, 630.0, 522.0, 636.0, 587.0, 633.0, 579.0, 633.0, 582.0, 630.0, 582.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 306.0, 324.0, 317.0, 322.0, 314.0, 316.0, 282.0, 288.0, 321.0, 309.0, 288.0, 294.0, 294.0, 288.0, 290.0, 286.0, 302.0, 307.0, 291.0, 291.0, 311.0, 319.0, 324.0, 312.0, 313.0, 317.0, 311.0, 319.0, 311.0, 322.0, 288.0, 299.0, 283.0, 301.0, 293.0, 289.0, 319.0, 314.0, 283.0, 304.0, 314.0, 322.0, 319.0, 320.0, 316.0, 314.0, 321.0, 309.0, 257.0, 265.0, 321.0, 315.0, 288.0, 299.0, 311.0, 322.0, 291.0, 288.0, 309.0, 324.0, 285.0, 297.0, 316.0, 314.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.783318071764331, "mean_processing_ms": 0.22516025845999352, "mean_inference_ms": 1.3858608320735286}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10896000, "num_steps_sampled": 5811200, "sample_time_ms": 20778.477, "load_time_ms": 39.252, "grad_time_ms": 10489.319, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007634037174284458, "policy_loss": -0.006680456455796957, "vf_loss": 80.01913452148438, "vf_explained_var": 0.7667891383171082, "kl": 0.0017371875001117587, "entropy": 1.1161128282546997, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5811200, "episodes_total": 14528, "training_iteration": 454, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-41-06", "timestamp": 1660261266, "time_this_iter_s": 27.318589210510254, "time_total_s": 19674.69349503517, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19674.69349503517, "timesteps_since_restore": 5811200, "iterations_since_restore": 454, "perf": {"cpu_util_percent": 37.051282051282044, "ram_util_percent": 58.9923076923077}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 606.28, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.14}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.2, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.27, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.27, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.27, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0, 582.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 636.0, 636.0, 636.0, 582.0, 633.0, 587.0, 581.0, 633.0, 582.0, 630.0, 630.0, 587.0, 582.0, 636.0, 582.0, 633.0, 536.0, 582.0, 636.0, 639.0, 627.0, 582.0, 624.0, 579.0, 573.0, 633.0, 633.0, 587.0, 633.0, 639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 293.0, 289.0, 319.0, 314.0, 316.0, 317.0, 286.0, 296.0, 293.0, 289.0, 291.0, 291.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 314.0, 322.0, 288.0, 294.0, 319.0, 314.0, 288.0, 299.0, 290.0, 291.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 319.0, 311.0, 296.0, 291.0, 291.0, 291.0, 322.0, 314.0, 290.0, 292.0, 316.0, 317.0, 257.0, 279.0, 288.0, 294.0, 311.0, 325.0, 322.0, 317.0, 319.0, 308.0, 293.0, 289.0, 312.0, 312.0, 296.0, 283.0, 287.0, 286.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7826053116086107, "mean_processing_ms": 0.22502024237804114, "mean_inference_ms": 1.384772842121544}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10920000, "num_steps_sampled": 5824000, "sample_time_ms": 20652.138, "load_time_ms": 39.437, "grad_time_ms": 10376.522, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003411610086914152, "policy_loss": -0.0072668264620006084, "vf_loss": 81.6093978881836, "vf_explained_var": 0.7669034600257874, "kl": 0.0018620697082951665, "entropy": 1.1059015989303589, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5824000, "episodes_total": 14560, "training_iteration": 455, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-41-35", "timestamp": 1660261295, "time_this_iter_s": 29.449601650238037, "time_total_s": 19704.14309668541, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19704.14309668541, "timesteps_since_restore": 5824000, "iterations_since_restore": 455, "perf": {"cpu_util_percent": 37.61666666666666, "ram_util_percent": 59.06190476190477}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 607.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.67}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.54, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.63, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0, 633.0, 633.0, 587.0, 633.0, 639.0, 587.0, 627.0, 630.0, 630.0, 627.0, 582.0, 633.0, 582.0, 582.0, 636.0, 630.0, 579.0, 627.0, 627.0, 579.0, 587.0, 633.0, 633.0, 582.0, 582.0, 582.0, 636.0, 633.0, 587.0, 633.0, 579.0, 636.0, 582.0, 582.0, 627.0, 636.0, 633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 316.0, 317.0, 316.0, 317.0, 290.0, 297.0, 314.0, 319.0, 319.0, 320.0, 288.0, 299.0, 313.0, 314.0, 306.0, 324.0, 311.0, 319.0, 316.0, 311.0, 296.0, 286.0, 322.0, 311.0, 296.0, 286.0, 298.0, 284.0, 314.0, 322.0, 311.0, 319.0, 277.0, 302.0, 319.0, 308.0, 316.0, 311.0, 290.0, 289.0, 298.0, 289.0, 316.0, 317.0, 314.0, 319.0, 289.0, 293.0, 299.0, 283.0, 286.0, 296.0, 314.0, 322.0, 318.0, 315.0, 291.0, 296.0, 316.0, 317.0, 287.0, 292.0, 314.0, 322.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0]}, "sampler_perf": {"mean_env_wait_ms": 0.781893848476972, "mean_processing_ms": 0.22487934380632985, "mean_inference_ms": 1.3836766382532326}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10944000, "num_steps_sampled": 5836800, "sample_time_ms": 20326.683, "load_time_ms": 39.601, "grad_time_ms": 10224.263, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009388479520566761, "policy_loss": -0.008330571465194225, "vf_loss": 79.4411849975586, "vf_explained_var": 0.7653481960296631, "kl": 0.0017687659710645676, "entropy": 1.1048110723495483, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5836800, "episodes_total": 14592, "training_iteration": 456, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-42-04", "timestamp": 1660261324, "time_this_iter_s": 28.971395254135132, "time_total_s": 19733.114491939545, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19733.114491939545, "timesteps_since_restore": 5836800, "iterations_since_restore": 456, "perf": {"cpu_util_percent": 36.80731707317073, "ram_util_percent": 59.075609756097556}} -{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 598.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 299.33}, "custom_metrics": {"sparse_reward_mean": 207.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.26, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.33, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.74, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 633.0, 582.0, 582.0, 579.0, 582.0, 633.0, 630.0, 606.0, 639.0, 630.0, 639.0, 633.0, 636.0, 587.0, 582.0, 582.0, 582.0, 579.0, 630.0, 462.0, 573.0, 579.0, 582.0, 624.0, 636.0, 587.0, 579.0, 579.0, 636.0, 627.0, 587.0, 624.0, 636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 293.0, 289.0, 291.0, 291.0, 316.0, 311.0, 319.0, 317.0, 316.0, 317.0, 291.0, 291.0, 291.0, 291.0, 296.0, 283.0, 286.0, 296.0, 311.0, 322.0, 316.0, 314.0, 300.0, 306.0, 319.0, 320.0, 319.0, 311.0, 319.0, 320.0, 316.0, 317.0, 319.0, 317.0, 296.0, 291.0, 299.0, 283.0, 291.0, 291.0, 291.0, 291.0, 291.0, 288.0, 308.0, 322.0, 228.0, 234.0, 279.0, 294.0, 283.0, 296.0, 288.0, 294.0, 313.0, 311.0, 319.0, 317.0, 291.0, 296.0, 291.0, 288.0, 287.0, 292.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7811987617860328, "mean_processing_ms": 0.2247433395983137, "mean_inference_ms": 1.3827621285902887}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10968000, "num_steps_sampled": 5849600, "sample_time_ms": 20643.82, "load_time_ms": 39.721, "grad_time_ms": 10128.234, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00031386129558086395, "policy_loss": -0.007833792828023434, "vf_loss": 87.0155258178711, "vf_explained_var": 0.759077787399292, "kl": 0.0023228460922837257, "entropy": 1.10780668258667, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5849600, "episodes_total": 14624, "training_iteration": 457, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-42-37", "timestamp": 1660261357, "time_this_iter_s": 32.612699031829834, "time_total_s": 19765.727190971375, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19765.727190971375, "timesteps_since_restore": 5849600, "iterations_since_restore": 457, "perf": {"cpu_util_percent": 35.49347826086956, "ram_util_percent": 59.11739130434784}} -{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 600.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 300.29}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.98, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.3, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.9, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.66, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.9, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.9, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0, 636.0, 627.0, 587.0, 624.0, 636.0, 630.0, 624.0, 587.0, 573.0, 633.0, 636.0, 582.0, 630.0, 633.0, 639.0, 633.0, 633.0, 582.0, 630.0, 633.0, 630.0, 630.0, 633.0, 582.0, 630.0, 630.0, 587.0, 573.0, 576.0, 582.0, 636.0, 525.0, 633.0, 579.0, 633.0, 527.0, 519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 319.0, 317.0, 316.0, 311.0, 296.0, 291.0, 316.0, 308.0, 319.0, 317.0, 313.0, 317.0, 316.0, 308.0, 301.0, 286.0, 290.0, 283.0, 319.0, 314.0, 324.0, 312.0, 291.0, 291.0, 315.0, 315.0, 317.0, 316.0, 314.0, 325.0, 316.0, 317.0, 316.0, 317.0, 293.0, 289.0, 316.0, 314.0, 321.0, 312.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 288.0, 294.0, 313.0, 317.0, 311.0, 319.0, 288.0, 299.0, 282.0, 291.0, 285.0, 291.0, 286.0, 296.0, 311.0, 325.0, 268.0, 257.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7805088940621119, "mean_processing_ms": 0.22460838488183463, "mean_inference_ms": 1.3819062694297763}, "off_policy_estimator": {}, "info": {"num_steps_trained": 10992000, "num_steps_sampled": 5862400, "sample_time_ms": 20388.555, "load_time_ms": 39.901, "grad_time_ms": 10047.414, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00028213454061187804, "policy_loss": -0.007530031260102987, "vf_loss": 78.06029510498047, "vf_explained_var": 0.7737483382225037, "kl": 0.0017934959614649415, "entropy": 1.1162586212158203, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5862400, "episodes_total": 14656, "training_iteration": 458, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-43-08", "timestamp": 1660261388, "time_this_iter_s": 31.54933786392212, "time_total_s": 19797.276528835297, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19797.276528835297, "timesteps_since_restore": 5862400, "iterations_since_restore": 458, "perf": {"cpu_util_percent": 36.01111111111111, "ram_util_percent": 59.15777777777779}} -{"episode_reward_max": 639.0, "episode_reward_min": 197.0, "episode_reward_mean": 599.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 98.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 299.865}, "custom_metrics": {"sparse_reward_mean": 207.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 184.93, "shaped_reward_min": 77, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.45, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.53, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.54, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.02, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.53, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.53, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 633.0, 579.0, 633.0, 527.0, 519.0, 630.0, 579.0, 579.0, 576.0, 575.0, 582.0, 579.0, 197.0, 639.0, 582.0, 636.0, 587.0, 579.0, 582.0, 636.0, 582.0, 582.0, 630.0, 582.0, 582.0, 570.0, 633.0, 627.0, 582.0, 587.0, 630.0, 639.0, 590.0, 633.0, 582.0, 630.0, 582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 316.0, 317.0, 296.0, 283.0, 320.0, 313.0, 254.0, 273.0, 248.0, 271.0, 311.0, 319.0, 288.0, 291.0, 285.0, 294.0, 290.0, 286.0, 289.0, 286.0, 288.0, 294.0, 285.0, 294.0, 99.0, 98.0, 322.0, 317.0, 288.0, 294.0, 319.0, 317.0, 298.0, 289.0, 288.0, 291.0, 299.0, 283.0, 314.0, 322.0, 296.0, 286.0, 291.0, 291.0, 311.0, 319.0, 290.0, 292.0, 288.0, 294.0, 277.0, 293.0, 316.0, 317.0, 316.0, 311.0, 294.0, 288.0, 290.0, 297.0, 316.0, 314.0, 319.0, 320.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7798432151982481, "mean_processing_ms": 0.22448068931407403, "mean_inference_ms": 1.3813562407452884}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11016000, "num_steps_sampled": 5875200, "sample_time_ms": 20860.599, "load_time_ms": 39.533, "grad_time_ms": 10085.729, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -4.9240032240049914e-05, "policy_loss": -0.007299743592739105, "vf_loss": 78.08226776123047, "vf_explained_var": 0.7718666195869446, "kl": 0.001796315424144268, "entropy": 1.1154268980026245, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5875200, "episodes_total": 14688, "training_iteration": 459, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-43-44", "timestamp": 1660261424, "time_this_iter_s": 35.86376190185547, "time_total_s": 19833.140290737152, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19833.140290737152, "timesteps_since_restore": 5875200, "iterations_since_restore": 459, "perf": {"cpu_util_percent": 35.09411764705882, "ram_util_percent": 59.11176470588236}} -{"episode_reward_max": 639.0, "episode_reward_min": 504.0, "episode_reward_mean": 606.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.165}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.73, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.61, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.65, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.58, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.5, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.65, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.65, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0, 590.0, 633.0, 582.0, 630.0, 582.0, 587.0, 606.0, 582.0, 633.0, 579.0, 587.0, 639.0, 567.0, 630.0, 630.0, 582.0, 627.0, 636.0, 587.0, 636.0, 639.0, 582.0, 587.0, 587.0, 630.0, 633.0, 630.0, 579.0, 587.0, 576.0, 633.0, 587.0, 587.0, 630.0, 579.0, 630.0, 582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 294.0, 296.0, 316.0, 317.0, 296.0, 286.0, 311.0, 319.0, 289.0, 293.0, 291.0, 296.0, 301.0, 305.0, 286.0, 296.0, 319.0, 314.0, 290.0, 289.0, 290.0, 297.0, 319.0, 320.0, 282.0, 285.0, 310.0, 320.0, 319.0, 311.0, 288.0, 294.0, 313.0, 314.0, 319.0, 317.0, 293.0, 294.0, 319.0, 317.0, 319.0, 320.0, 293.0, 289.0, 299.0, 288.0, 290.0, 297.0, 310.0, 320.0, 324.0, 309.0, 311.0, 319.0, 291.0, 288.0, 299.0, 288.0, 286.0, 290.0, 317.0, 316.0, 296.0, 291.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7791820626361985, "mean_processing_ms": 0.22435411593063023, "mean_inference_ms": 1.3809254829785487}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11040000, "num_steps_sampled": 5888000, "sample_time_ms": 21384.075, "load_time_ms": 38.923, "grad_time_ms": 10181.416, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033431891351938248, "policy_loss": -0.0041669332422316074, "vf_loss": 80.67221069335938, "vf_explained_var": 0.7671453356742859, "kl": 0.0021624856162816286, "entropy": 1.1142171621322632, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5888000, "episodes_total": 14720, "training_iteration": 460, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-44-21", "timestamp": 1660261461, "time_this_iter_s": 36.32121300697327, "time_total_s": 19869.461503744125, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19869.461503744125, "timesteps_since_restore": 5888000, "iterations_since_restore": 460, "perf": {"cpu_util_percent": 35.05294117647058, "ram_util_percent": 59.078431372549026}} -{"episode_reward_max": 639.0, "episode_reward_min": 504.0, "episode_reward_mean": 604.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.105}, "custom_metrics": {"sparse_reward_mean": 209.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.21, "shaped_reward_min": 144, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0, 587.0, 630.0, 579.0, 630.0, 582.0, 636.0, 639.0, 630.0, 630.0, 582.0, 636.0, 581.0, 636.0, 630.0, 630.0, 590.0, 633.0, 636.0, 636.0, 630.0, 582.0, 582.0, 630.0, 584.0, 579.0, 627.0, 504.0, 630.0, 582.0, 633.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 293.0, 294.0, 316.0, 314.0, 294.0, 285.0, 326.0, 304.0, 282.0, 300.0, 319.0, 317.0, 319.0, 320.0, 319.0, 311.0, 311.0, 319.0, 293.0, 289.0, 319.0, 317.0, 290.0, 291.0, 319.0, 317.0, 316.0, 314.0, 311.0, 319.0, 296.0, 294.0, 317.0, 316.0, 319.0, 317.0, 319.0, 317.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 314.0, 316.0, 298.0, 286.0, 288.0, 291.0, 308.0, 319.0, 254.0, 250.0, 311.0, 319.0, 288.0, 294.0, 313.0, 320.0, 314.0, 316.0, 314.0, 319.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7785286320058347, "mean_processing_ms": 0.2242302416319241, "mean_inference_ms": 1.3806320563992234}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11064000, "num_steps_sampled": 5900800, "sample_time_ms": 21464.8, "load_time_ms": 38.628, "grad_time_ms": 10142.381, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011879469966515899, "policy_loss": -0.00630860636010766, "vf_loss": 80.56136322021484, "vf_explained_var": 0.7605991363525391, "kl": 0.002013101242482662, "entropy": 1.1191506385803223, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5900800, "episodes_total": 14752, "training_iteration": 461, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-44-56", "timestamp": 1660261496, "time_this_iter_s": 35.2242169380188, "time_total_s": 19904.685720682144, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19904.685720682144, "timesteps_since_restore": 5900800, "iterations_since_restore": 461, "perf": {"cpu_util_percent": 34.286, "ram_util_percent": 59.076}} -{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 606.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.04}, "custom_metrics": {"sparse_reward_mean": 209.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.88, "shaped_reward_min": 167, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.23, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.08, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.13, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.53, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.02, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 4, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.13, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.13, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0, 587.0, 582.0, 582.0, 633.0, 627.0, 582.0, 582.0, 639.0, 582.0, 636.0, 582.0, 627.0, 582.0, 582.0, 582.0, 582.0, 579.0, 636.0, 587.0, 636.0, 630.0, 633.0, 627.0, 582.0, 639.0, 627.0, 570.0, 633.0, 582.0, 579.0, 636.0, 630.0, 587.0, 582.0, 582.0, 573.0, 630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 291.0, 296.0, 293.0, 289.0, 289.0, 293.0, 317.0, 316.0, 308.0, 319.0, 296.0, 286.0, 288.0, 294.0, 319.0, 320.0, 288.0, 294.0, 314.0, 322.0, 291.0, 291.0, 316.0, 311.0, 291.0, 291.0, 293.0, 289.0, 282.0, 300.0, 296.0, 286.0, 288.0, 291.0, 319.0, 317.0, 296.0, 291.0, 319.0, 317.0, 319.0, 311.0, 311.0, 322.0, 316.0, 311.0, 287.0, 295.0, 322.0, 317.0, 316.0, 311.0, 282.0, 288.0, 319.0, 314.0, 295.0, 287.0, 288.0, 291.0, 327.0, 309.0, 316.0, 314.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7778691140112235, "mean_processing_ms": 0.22410468367497743, "mean_inference_ms": 1.3802370398093593}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11088000, "num_steps_sampled": 5913600, "sample_time_ms": 21647.909, "load_time_ms": 38.304, "grad_time_ms": 10153.272, "update_time_ms": 0.004, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001676362007856369, "policy_loss": -0.005163781810551882, "vf_loss": 73.99588012695312, "vf_explained_var": 0.7706634402275085, "kl": 0.00203719618730247, "entropy": 1.1188966035842896, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5913600, "episodes_total": 14784, "training_iteration": 462, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-45-29", "timestamp": 1660261529, "time_this_iter_s": 32.866820096969604, "time_total_s": 19937.552540779114, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19937.552540779114, "timesteps_since_restore": 5913600, "iterations_since_restore": 462, "perf": {"cpu_util_percent": 36.0304347826087, "ram_util_percent": 59.14782608695653}} -{"episode_reward_max": 639.0, "episode_reward_min": 527.0, "episode_reward_mean": 606.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.275}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.95, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.71, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.71, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.71, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0, 587.0, 582.0, 582.0, 573.0, 630.0, 582.0, 579.0, 630.0, 527.0, 587.0, 636.0, 584.0, 587.0, 579.0, 570.0, 582.0, 579.0, 636.0, 627.0, 573.0, 633.0, 582.0, 630.0, 582.0, 582.0, 630.0, 636.0, 582.0, 579.0, 582.0, 587.0, 636.0, 587.0, 627.0, 587.0, 633.0, 582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 293.0, 294.0, 294.0, 288.0, 290.0, 292.0, 285.0, 288.0, 316.0, 314.0, 293.0, 289.0, 288.0, 291.0, 316.0, 314.0, 273.0, 254.0, 291.0, 296.0, 316.0, 320.0, 290.0, 294.0, 290.0, 297.0, 293.0, 286.0, 285.0, 285.0, 294.0, 288.0, 290.0, 289.0, 314.0, 322.0, 313.0, 314.0, 273.0, 300.0, 319.0, 314.0, 299.0, 283.0, 311.0, 319.0, 288.0, 294.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 293.0, 289.0, 293.0, 286.0, 285.0, 297.0, 293.0, 294.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7772074101048227, "mean_processing_ms": 0.22397937610682028, "mean_inference_ms": 1.379755174295214}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11112000, "num_steps_sampled": 5926400, "sample_time_ms": 22034.25, "load_time_ms": 37.344, "grad_time_ms": 10407.822, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004309405107051134, "policy_loss": -0.0030093893874436617, "vf_loss": 78.81652069091797, "vf_explained_var": 0.7744302749633789, "kl": 0.0022333713714033365, "entropy": 1.1256990432739258, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5926400, "episodes_total": 14816, "training_iteration": 463, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-46-04", "timestamp": 1660261564, "time_this_iter_s": 34.88473105430603, "time_total_s": 19972.43727183342, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 19972.43727183342, "timesteps_since_restore": 5926400, "iterations_since_restore": 463, "perf": {"cpu_util_percent": 36.355999999999995, "ram_util_percent": 59.1}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 613.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.575}, "custom_metrics": {"sparse_reward_mean": 212.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.35, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 14, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.47, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.19, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.84, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0, 587.0, 627.0, 587.0, 633.0, 582.0, 630.0, 636.0, 639.0, 639.0, 582.0, 627.0, 576.0, 587.0, 636.0, 633.0, 636.0, 630.0, 587.0, 633.0, 582.0, 579.0, 573.0, 636.0, 627.0, 633.0, 636.0, 633.0, 587.0, 573.0, 636.0, 630.0, 636.0, 633.0, 636.0, 630.0, 633.0, 636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 296.0, 291.0, 313.0, 314.0, 296.0, 291.0, 311.0, 322.0, 291.0, 291.0, 316.0, 314.0, 316.0, 320.0, 317.0, 322.0, 324.0, 315.0, 291.0, 291.0, 313.0, 314.0, 296.0, 280.0, 296.0, 291.0, 314.0, 322.0, 313.0, 320.0, 314.0, 322.0, 319.0, 311.0, 291.0, 296.0, 316.0, 317.0, 291.0, 291.0, 290.0, 289.0, 288.0, 285.0, 309.0, 327.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 317.0, 288.0, 299.0, 288.0, 285.0, 314.0, 322.0, 313.0, 317.0, 314.0, 322.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7765494210947501, "mean_processing_ms": 0.2238566721049911, "mean_inference_ms": 1.3793130627015395}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11136000, "num_steps_sampled": 5939200, "sample_time_ms": 22597.793, "load_time_ms": 37.078, "grad_time_ms": 10833.136, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004432214889675379, "policy_loss": -0.0028620418161153793, "vf_loss": 78.4912338256836, "vf_explained_var": 0.7597255110740662, "kl": 0.0021421227138489485, "entropy": 1.10971999168396, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5939200, "episodes_total": 14848, "training_iteration": 464, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-46-41", "timestamp": 1660261601, "time_this_iter_s": 37.22005105018616, "time_total_s": 20009.657322883606, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20009.657322883606, "timesteps_since_restore": 5939200, "iterations_since_restore": 464, "perf": {"cpu_util_percent": 33.75576923076923, "ram_util_percent": 59.192307692307686}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 609.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.695}, "custom_metrics": {"sparse_reward_mean": 211.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.39, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.17, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.12, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.2, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.6, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.12, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.2, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.2, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0, 633.0, 636.0, 630.0, 633.0, 636.0, 630.0, 636.0, 576.0, 587.0, 579.0, 633.0, 579.0, 630.0, 636.0, 636.0, 569.0, 582.0, 584.0, 633.0, 633.0, 627.0, 582.0, 582.0, 587.0, 564.0, 582.0, 639.0, 587.0, 582.0, 627.0, 636.0, 587.0, 630.0, 582.0, 630.0, 639.0, 633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 319.0, 314.0, 319.0, 317.0, 319.0, 311.0, 319.0, 317.0, 282.0, 294.0, 291.0, 296.0, 294.0, 285.0, 314.0, 319.0, 291.0, 288.0, 316.0, 314.0, 314.0, 322.0, 322.0, 314.0, 281.0, 288.0, 292.0, 290.0, 285.0, 299.0, 321.0, 312.0, 319.0, 314.0, 305.0, 322.0, 293.0, 289.0, 288.0, 294.0, 293.0, 294.0, 276.0, 288.0, 296.0, 286.0, 317.0, 322.0, 293.0, 294.0, 296.0, 286.0, 319.0, 308.0, 316.0, 320.0, 286.0, 301.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7758921958877883, "mean_processing_ms": 0.22373444703892312, "mean_inference_ms": 1.3788740058001947}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11160000, "num_steps_sampled": 5952000, "sample_time_ms": 22835.117, "load_time_ms": 36.936, "grad_time_ms": 10943.452, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008901534602046013, "policy_loss": -0.006549746263772249, "vf_loss": 80.0201416015625, "vf_explained_var": 0.7684802412986755, "kl": 0.0019470960833132267, "entropy": 1.1242157220840454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5952000, "episodes_total": 14880, "training_iteration": 465, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-47-14", "timestamp": 1660261634, "time_this_iter_s": 32.92273998260498, "time_total_s": 20042.58006286621, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20042.58006286621, "timesteps_since_restore": 5952000, "iterations_since_restore": 465, "perf": {"cpu_util_percent": 33.19787234042553, "ram_util_percent": 58.98936170212765}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 611.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.81}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 187.62, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.66, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.35, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.01, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.63, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.13, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0, 630.0, 582.0, 630.0, 639.0, 633.0, 630.0, 633.0, 633.0, 630.0, 630.0, 630.0, 630.0, 627.0, 587.0, 630.0, 576.0, 627.0, 587.0, 630.0, 630.0, 633.0, 582.0, 522.0, 630.0, 633.0, 587.0, 582.0, 639.0, 582.0, 630.0, 633.0, 582.0, 630.0, 633.0, 636.0, 636.0, 639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 316.0, 314.0, 293.0, 289.0, 319.0, 311.0, 316.0, 323.0, 316.0, 317.0, 309.0, 321.0, 319.0, 314.0, 319.0, 314.0, 316.0, 314.0, 321.0, 309.0, 316.0, 314.0, 316.0, 314.0, 308.0, 319.0, 293.0, 294.0, 311.0, 319.0, 288.0, 288.0, 313.0, 314.0, 291.0, 296.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 257.0, 265.0, 316.0, 314.0, 324.0, 309.0, 291.0, 296.0, 288.0, 294.0, 319.0, 320.0, 290.0, 292.0, 313.0, 317.0, 319.0, 314.0, 290.0, 292.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7752302665514498, "mean_processing_ms": 0.22360935089938724, "mean_inference_ms": 1.378262918190576}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11184000, "num_steps_sampled": 5964800, "sample_time_ms": 22879.712, "load_time_ms": 37.04, "grad_time_ms": 10842.802, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008535028900951147, "policy_loss": -0.007580641657114029, "vf_loss": 72.87356567382812, "vf_explained_var": 0.7752940058708191, "kl": 0.0019255572697147727, "entropy": 1.12042236328125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5964800, "episodes_total": 14912, "training_iteration": 466, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-47-42", "timestamp": 1660261662, "time_this_iter_s": 28.411120176315308, "time_total_s": 20070.991183042526, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20070.991183042526, "timesteps_since_restore": 5964800, "iterations_since_restore": 466, "perf": {"cpu_util_percent": 30.5725, "ram_util_percent": 58.955}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 607.06, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.53}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.26, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.11, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.67, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.24, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.44, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.96, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0, 630.0, 633.0, 636.0, 636.0, 639.0, 630.0, 522.0, 636.0, 573.0, 636.0, 636.0, 582.0, 582.0, 633.0, 633.0, 587.0, 633.0, 633.0, 582.0, 590.0, 582.0, 579.0, 630.0, 627.0, 587.0, 462.0, 639.0, 630.0, 579.0, 584.0, 630.0, 587.0, 579.0, 587.0, 630.0, 633.0, 579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 311.0, 319.0, 316.0, 317.0, 311.0, 325.0, 319.0, 317.0, 317.0, 322.0, 316.0, 314.0, 268.0, 254.0, 319.0, 317.0, 299.0, 274.0, 314.0, 322.0, 313.0, 323.0, 288.0, 294.0, 293.0, 289.0, 324.0, 309.0, 316.0, 317.0, 298.0, 289.0, 319.0, 314.0, 316.0, 317.0, 293.0, 289.0, 296.0, 294.0, 288.0, 294.0, 288.0, 291.0, 316.0, 314.0, 307.0, 320.0, 296.0, 291.0, 223.0, 239.0, 324.0, 315.0, 313.0, 317.0, 290.0, 289.0, 293.0, 291.0, 311.0, 319.0, 296.0, 291.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7745646022660153, "mean_processing_ms": 0.2234810440216946, "mean_inference_ms": 1.3774520581026746}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11208000, "num_steps_sampled": 5977600, "sample_time_ms": 22667.166, "load_time_ms": 37.563, "grad_time_ms": 10782.505, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003179629857186228, "policy_loss": -0.007398936897516251, "vf_loss": 76.44898986816406, "vf_explained_var": 0.7757420539855957, "kl": 0.0019861727487295866, "entropy": 1.127841830253601, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5977600, "episodes_total": 14944, "training_iteration": 467, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-48-12", "timestamp": 1660261692, "time_this_iter_s": 29.89157724380493, "time_total_s": 20100.88276028633, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20100.88276028633, "timesteps_since_restore": 5977600, "iterations_since_restore": 467, "perf": {"cpu_util_percent": 33.63333333333334, "ram_util_percent": 58.976190476190474}} -{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 606.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 303.42}, "custom_metrics": {"sparse_reward_mean": 210.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.04, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.12, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.18, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.58, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.42, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.38, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.58, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.58, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 579.0, 587.0, 630.0, 633.0, 579.0, 582.0, 630.0, 579.0, 639.0, 636.0, 630.0, 636.0, 633.0, 636.0, 582.0, 587.0, 624.0, 639.0, 630.0, 582.0, 636.0, 633.0, 633.0, 630.0, 630.0, 579.0, 555.0, 630.0, 576.0, 630.0, 627.0, 587.0, 639.0, 627.0, 627.0, 633.0, 582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 288.0, 291.0, 298.0, 289.0, 316.0, 314.0, 314.0, 319.0, 291.0, 288.0, 288.0, 294.0, 316.0, 314.0, 288.0, 291.0, 317.0, 322.0, 319.0, 317.0, 313.0, 317.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 291.0, 291.0, 280.0, 307.0, 316.0, 308.0, 324.0, 315.0, 317.0, 313.0, 283.0, 299.0, 321.0, 315.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 311.0, 319.0, 294.0, 285.0, 270.0, 285.0, 316.0, 314.0, 281.0, 295.0, 310.0, 320.0, 313.0, 314.0, 288.0, 299.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7738952247411988, "mean_processing_ms": 0.22335063238074299, "mean_inference_ms": 1.3764749095367632}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11232000, "num_steps_sampled": 5990400, "sample_time_ms": 22503.528, "load_time_ms": 37.786, "grad_time_ms": 10902.652, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012515783309936523, "policy_loss": -0.006079933140426874, "vf_loss": 78.91991424560547, "vf_explained_var": 0.7658045887947083, "kl": 0.0020609761122614145, "entropy": 1.1209732294082642, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 5990400, "episodes_total": 14976, "training_iteration": 468, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-48-43", "timestamp": 1660261723, "time_this_iter_s": 31.115761756896973, "time_total_s": 20131.998522043228, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20131.998522043228, "timesteps_since_restore": 5990400, "iterations_since_restore": 468, "perf": {"cpu_util_percent": 34.11818181818182, "ram_util_percent": 59.265909090909076}} -{"episode_reward_max": 639.0, "episode_reward_min": 507.0, "episode_reward_mean": 609.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.625}, "custom_metrics": {"sparse_reward_mean": 211.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 186.45, "shaped_reward_min": 147, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.39, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.84, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.35, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0, 639.0, 627.0, 627.0, 633.0, 582.0, 584.0, 579.0, 582.0, 630.0, 627.0, 573.0, 582.0, 636.0, 587.0, 639.0, 636.0, 539.0, 615.0, 579.0, 582.0, 633.0, 630.0, 582.0, 582.0, 576.0, 636.0, 627.0, 630.0, 507.0, 633.0, 564.0, 627.0, 582.0, 630.0, 630.0, 582.0, 630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 322.0, 317.0, 313.0, 314.0, 321.0, 306.0, 316.0, 317.0, 286.0, 296.0, 290.0, 294.0, 288.0, 291.0, 291.0, 291.0, 316.0, 314.0, 321.0, 306.0, 288.0, 285.0, 288.0, 294.0, 322.0, 314.0, 287.0, 300.0, 314.0, 325.0, 314.0, 322.0, 271.0, 268.0, 302.0, 313.0, 285.0, 294.0, 288.0, 294.0, 314.0, 319.0, 316.0, 314.0, 288.0, 294.0, 291.0, 291.0, 288.0, 288.0, 317.0, 319.0, 316.0, 311.0, 311.0, 319.0, 251.0, 256.0, 314.0, 319.0, 281.0, 283.0, 308.0, 319.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7732342684064082, "mean_processing_ms": 0.22322351749716166, "mean_inference_ms": 1.3755872244816174}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11256000, "num_steps_sampled": 6003200, "sample_time_ms": 22143.958, "load_time_ms": 38.351, "grad_time_ms": 11066.072, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00040481146425008774, "policy_loss": -0.006932735443115234, "vf_loss": 70.86636352539062, "vf_explained_var": 0.7707180976867676, "kl": 0.0017913728952407837, "entropy": 1.1174226999282837, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6003200, "episodes_total": 15008, "training_iteration": 469, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-49-17", "timestamp": 1660261757, "time_this_iter_s": 33.90879726409912, "time_total_s": 20165.907319307327, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20165.907319307327, "timesteps_since_restore": 6003200, "iterations_since_restore": 469, "perf": {"cpu_util_percent": 34.28125, "ram_util_percent": 58.99583333333334}} -{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 612.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 274.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.385}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 187.17, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.2, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.28, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.73, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.28, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.92, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.28, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.28, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0, 582.0, 630.0, 630.0, 582.0, 630.0, 581.0, 582.0, 582.0, 636.0, 633.0, 633.0, 561.0, 630.0, 578.0, 633.0, 582.0, 582.0, 582.0, 579.0, 584.0, 639.0, 587.0, 582.0, 587.0, 633.0, 630.0, 630.0, 567.0, 587.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 289.0, 293.0, 318.0, 312.0, 311.0, 319.0, 297.0, 285.0, 316.0, 314.0, 290.0, 291.0, 293.0, 289.0, 296.0, 286.0, 321.0, 315.0, 319.0, 314.0, 319.0, 314.0, 285.0, 276.0, 316.0, 314.0, 295.0, 283.0, 319.0, 314.0, 288.0, 294.0, 294.0, 288.0, 283.0, 299.0, 281.0, 298.0, 288.0, 296.0, 317.0, 322.0, 296.0, 291.0, 289.0, 293.0, 288.0, 299.0, 321.0, 312.0, 313.0, 317.0, 319.0, 311.0, 284.0, 283.0, 302.0, 285.0, 314.0, 322.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7725761312773625, "mean_processing_ms": 0.2230967942471684, "mean_inference_ms": 1.3747759311690726}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11280000, "num_steps_sampled": 6016000, "sample_time_ms": 21879.975, "load_time_ms": 38.961, "grad_time_ms": 10890.541, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007060763309709728, "policy_loss": -0.0072075664065778255, "vf_loss": 70.60037231445312, "vf_explained_var": 0.7745871543884277, "kl": 0.0018414078513160348, "entropy": 1.1170852184295654, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6016000, "episodes_total": 15040, "training_iteration": 470, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-49-49", "timestamp": 1660261789, "time_this_iter_s": 31.930355072021484, "time_total_s": 20197.83767437935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20197.83767437935, "timesteps_since_restore": 6016000, "iterations_since_restore": 470, "perf": {"cpu_util_percent": 29.486666666666665, "ram_util_percent": 59.02666666666667}} -{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 617.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 274.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 308.805}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.41, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.03, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.33, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.42, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.8, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 6.14, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.42, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.42, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0, 639.0, 633.0, 630.0, 576.0, 630.0, 624.0, 627.0, 639.0, 636.0, 636.0, 636.0, 587.0, 633.0, 582.0, 630.0, 636.0, 633.0, 624.0, 630.0, 624.0, 582.0, 633.0, 570.0, 579.0, 633.0, 630.0, 633.0, 630.0, 587.0, 570.0, 633.0, 633.0, 633.0, 627.0, 633.0, 627.0, 582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 317.0, 322.0, 309.0, 324.0, 316.0, 314.0, 291.0, 285.0, 316.0, 314.0, 313.0, 311.0, 316.0, 311.0, 319.0, 320.0, 319.0, 317.0, 314.0, 322.0, 319.0, 317.0, 295.0, 292.0, 316.0, 317.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 316.0, 308.0, 311.0, 319.0, 315.0, 309.0, 288.0, 294.0, 316.0, 317.0, 282.0, 288.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 313.0, 320.0, 316.0, 314.0, 291.0, 296.0, 296.0, 274.0, 321.0, 312.0, 316.0, 317.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7719288658166613, "mean_processing_ms": 0.22297329438160504, "mean_inference_ms": 1.3741316094375031}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11304000, "num_steps_sampled": 6028800, "sample_time_ms": 21780.977, "load_time_ms": 38.854, "grad_time_ms": 10976.386, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016910168342292309, "policy_loss": -0.008838978596031666, "vf_loss": 77.08248901367188, "vf_explained_var": 0.7691299319267273, "kl": 0.0020619730930775404, "entropy": 1.1205838918685913, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6028800, "episodes_total": 15072, "training_iteration": 471, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-50-24", "timestamp": 1660261824, "time_this_iter_s": 35.09460806846619, "time_total_s": 20232.932282447815, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20232.932282447815, "timesteps_since_restore": 6028800, "iterations_since_restore": 471, "perf": {"cpu_util_percent": 30.86, "ram_util_percent": 59.02799999999999}} -{"episode_reward_max": 639.0, "episode_reward_min": 552.0, "episode_reward_mean": 617.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 276.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.91}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 188.62, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.21, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.62, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.19, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0, 633.0, 627.0, 633.0, 627.0, 582.0, 633.0, 582.0, 639.0, 579.0, 579.0, 630.0, 630.0, 624.0, 636.0, 582.0, 630.0, 630.0, 633.0, 615.0, 627.0, 630.0, 587.0, 630.0, 630.0, 630.0, 636.0, 630.0, 636.0, 621.0, 587.0, 633.0, 582.0, 579.0, 633.0, 630.0, 552.0, 579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 317.0, 316.0, 316.0, 311.0, 316.0, 317.0, 311.0, 316.0, 284.0, 298.0, 314.0, 319.0, 294.0, 288.0, 317.0, 322.0, 297.0, 282.0, 285.0, 294.0, 311.0, 319.0, 319.0, 311.0, 315.0, 309.0, 319.0, 317.0, 291.0, 291.0, 316.0, 314.0, 319.0, 311.0, 316.0, 317.0, 307.0, 308.0, 319.0, 308.0, 316.0, 314.0, 296.0, 291.0, 311.0, 319.0, 313.0, 317.0, 311.0, 319.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 313.0, 308.0, 291.0, 296.0, 311.0, 322.0, 293.0, 289.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7712881392644176, "mean_processing_ms": 0.2228504081573419, "mean_inference_ms": 1.3735625457302265}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11328000, "num_steps_sampled": 6041600, "sample_time_ms": 21798.833, "load_time_ms": 39.052, "grad_time_ms": 10918.523, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015959719894453883, "policy_loss": -0.005134529899805784, "vf_loss": 72.90253448486328, "vf_explained_var": 0.7736382484436035, "kl": 0.00227816472761333, "entropy": 1.1195167303085327, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6041600, "episodes_total": 15104, "training_iteration": 472, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-50-57", "timestamp": 1660261857, "time_this_iter_s": 32.47214722633362, "time_total_s": 20265.40442967415, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20265.40442967415, "timesteps_since_restore": 6041600, "iterations_since_restore": 472, "perf": {"cpu_util_percent": 31.686956521739138, "ram_util_percent": 58.99347826086958}} -{"episode_reward_max": 639.0, "episode_reward_min": 498.0, "episode_reward_mean": 616.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.235}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.18, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.27, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.56, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.6, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.78, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.16, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.6, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.78, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.6, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.78, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0, 579.0, 633.0, 630.0, 552.0, 579.0, 630.0, 587.0, 624.0, 633.0, 636.0, 630.0, 633.0, 633.0, 639.0, 633.0, 636.0, 636.0, 633.0, 627.0, 582.0, 633.0, 639.0, 584.0, 582.0, 636.0, 618.0, 636.0, 633.0, 627.0, 582.0, 582.0, 582.0, 633.0, 636.0, 630.0, 582.0, 633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 279.0, 300.0, 311.0, 322.0, 316.0, 314.0, 276.0, 276.0, 286.0, 293.0, 319.0, 311.0, 288.0, 299.0, 311.0, 313.0, 311.0, 322.0, 317.0, 319.0, 313.0, 317.0, 316.0, 317.0, 321.0, 312.0, 317.0, 322.0, 314.0, 319.0, 324.0, 312.0, 324.0, 312.0, 321.0, 312.0, 308.0, 319.0, 296.0, 286.0, 314.0, 319.0, 319.0, 320.0, 296.0, 288.0, 286.0, 296.0, 319.0, 317.0, 308.0, 310.0, 317.0, 319.0, 319.0, 314.0, 321.0, 306.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7706549385151754, "mean_processing_ms": 0.22272985988411015, "mean_inference_ms": 1.3730067691447254}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11352000, "num_steps_sampled": 6054400, "sample_time_ms": 21729.717, "load_time_ms": 39.098, "grad_time_ms": 10717.617, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005160535220056772, "policy_loss": -0.007616788614541292, "vf_loss": 76.61554718017578, "vf_explained_var": 0.7677715420722961, "kl": 0.0017990797059610486, "entropy": 1.1216602325439453, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6054400, "episodes_total": 15136, "training_iteration": 473, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-51-29", "timestamp": 1660261889, "time_this_iter_s": 32.178860902786255, "time_total_s": 20297.583290576935, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20297.583290576935, "timesteps_since_restore": 6054400, "iterations_since_restore": 473, "perf": {"cpu_util_percent": 29.955555555555556, "ram_util_percent": 59.05111111111111}} -{"episode_reward_max": 639.0, "episode_reward_min": 498.0, "episode_reward_mean": 616.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 242.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.225}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 188.05, "shaped_reward_min": 138, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.67, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.89, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0, 633.0, 636.0, 630.0, 582.0, 633.0, 633.0, 582.0, 630.0, 624.0, 615.0, 582.0, 582.0, 633.0, 630.0, 633.0, 639.0, 582.0, 633.0, 621.0, 633.0, 636.0, 636.0, 636.0, 633.0, 633.0, 581.0, 633.0, 630.0, 582.0, 627.0, 584.0, 636.0, 582.0, 630.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 291.0, 291.0, 314.0, 319.0, 314.0, 319.0, 294.0, 288.0, 316.0, 314.0, 316.0, 308.0, 305.0, 310.0, 291.0, 291.0, 287.0, 295.0, 311.0, 322.0, 316.0, 314.0, 317.0, 316.0, 319.0, 320.0, 293.0, 289.0, 316.0, 317.0, 310.0, 311.0, 314.0, 319.0, 309.0, 327.0, 319.0, 317.0, 314.0, 322.0, 311.0, 322.0, 319.0, 314.0, 293.0, 288.0, 317.0, 316.0, 309.0, 321.0, 296.0, 286.0, 314.0, 313.0, 298.0, 286.0, 317.0, 319.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7700230204234435, "mean_processing_ms": 0.22260903764962753, "mean_inference_ms": 1.3724136807505178}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11376000, "num_steps_sampled": 6067200, "sample_time_ms": 21448.766, "load_time_ms": 39.222, "grad_time_ms": 10552.154, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001221023383550346, "policy_loss": -0.005727085750550032, "vf_loss": 75.09713745117188, "vf_explained_var": 0.7705094218254089, "kl": 0.002081832615658641, "entropy": 1.1232246160507202, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6067200, "episodes_total": 15168, "training_iteration": 474, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-52-02", "timestamp": 1660261922, "time_this_iter_s": 32.74073004722595, "time_total_s": 20330.32402062416, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20330.32402062416, "timesteps_since_restore": 6067200, "iterations_since_restore": 474, "perf": {"cpu_util_percent": 29.70652173913044, "ram_util_percent": 59.01521739130436}} -{"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 616.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 308.16}, "custom_metrics": {"sparse_reward_mean": 214.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 188.32, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.74, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.67, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.22, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.12, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.83, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.73, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.7, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.22, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.12, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.22, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.12, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0, 582.0, 630.0, 636.0, 639.0, 630.0, 582.0, 582.0, 630.0, 609.0, 630.0, 636.0, 582.0, 636.0, 633.0, 633.0, 558.0, 630.0, 633.0, 630.0, 630.0, 636.0, 630.0, 636.0, 582.0, 587.0, 627.0, 639.0, 636.0, 587.0, 630.0, 627.0, 630.0, 498.0, 579.0, 630.0, 630.0, 630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 316.0, 323.0, 319.0, 311.0, 291.0, 291.0, 294.0, 288.0, 316.0, 314.0, 304.0, 305.0, 316.0, 314.0, 319.0, 317.0, 286.0, 296.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 276.0, 282.0, 316.0, 314.0, 314.0, 319.0, 324.0, 306.0, 319.0, 311.0, 319.0, 317.0, 316.0, 314.0, 314.0, 322.0, 294.0, 288.0, 296.0, 291.0, 308.0, 319.0, 319.0, 320.0, 319.0, 317.0, 293.0, 294.0, 311.0, 319.0, 322.0, 305.0, 316.0, 314.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7693923762268673, "mean_processing_ms": 0.22248851172311768, "mean_inference_ms": 1.3718337576179396}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11400000, "num_steps_sampled": 6080000, "sample_time_ms": 21471.253, "load_time_ms": 39.181, "grad_time_ms": 10674.814, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001056800247170031, "policy_loss": -0.005863674450665712, "vf_loss": 74.7898178100586, "vf_explained_var": 0.7796471118927002, "kl": 0.002407137770205736, "entropy": 1.1170334815979004, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6080000, "episodes_total": 15200, "training_iteration": 475, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-52-36", "timestamp": 1660261956, "time_this_iter_s": 34.37432289123535, "time_total_s": 20364.698343515396, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20364.698343515396, "timesteps_since_restore": 6080000, "iterations_since_restore": 475, "perf": {"cpu_util_percent": 34.573469387755104, "ram_util_percent": 58.995918367346945}} -{"episode_reward_max": 639.0, "episode_reward_min": 291.0, "episode_reward_mean": 611.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 305.56}, "custom_metrics": {"sparse_reward_mean": 212.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 187.12, "shaped_reward_min": 91, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.55, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.07, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.82, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.66, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.69, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.07, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.07, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0, 498.0, 579.0, 630.0, 630.0, 630.0, 579.0, 636.0, 639.0, 633.0, 582.0, 579.0, 627.0, 636.0, 639.0, 636.0, 630.0, 573.0, 558.0, 573.0, 636.0, 579.0, 630.0, 633.0, 630.0, 633.0, 579.0, 636.0, 630.0, 630.0, 630.0, 587.0, 624.0, 633.0, 639.0, 582.0, 636.0, 633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 242.0, 256.0, 288.0, 291.0, 313.0, 317.0, 316.0, 314.0, 321.0, 309.0, 285.0, 294.0, 311.0, 325.0, 319.0, 320.0, 319.0, 314.0, 291.0, 291.0, 287.0, 292.0, 313.0, 314.0, 324.0, 312.0, 324.0, 315.0, 319.0, 317.0, 311.0, 319.0, 282.0, 291.0, 278.0, 280.0, 280.0, 293.0, 319.0, 317.0, 288.0, 291.0, 311.0, 319.0, 319.0, 314.0, 317.0, 313.0, 316.0, 317.0, 293.0, 286.0, 314.0, 322.0, 321.0, 309.0, 326.0, 304.0, 316.0, 314.0, 293.0, 294.0, 305.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7687649833136294, "mean_processing_ms": 0.2223678201524863, "mean_inference_ms": 1.3712901278888552}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11424000, "num_steps_sampled": 6092800, "sample_time_ms": 21822.996, "load_time_ms": 39.16, "grad_time_ms": 10774.782, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006356429657898843, "policy_loss": -0.006720335688441992, "vf_loss": 79.15064239501953, "vf_explained_var": 0.7751259207725525, "kl": 0.0025446319486945868, "entropy": 1.1181851625442505, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6092800, "episodes_total": 15232, "training_iteration": 476, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-53-09", "timestamp": 1660261989, "time_this_iter_s": 32.92729115486145, "time_total_s": 20397.625634670258, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20397.625634670258, "timesteps_since_restore": 6092800, "iterations_since_restore": 476, "perf": {"cpu_util_percent": 31.27608695652173, "ram_util_percent": 59.04347826086958}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 607.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.615}, "custom_metrics": {"sparse_reward_mean": 210.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.83, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.43, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.02, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.62, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.55, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.69, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.02, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.02, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0, 633.0, 639.0, 582.0, 636.0, 633.0, 587.0, 633.0, 633.0, 630.0, 639.0, 639.0, 624.0, 630.0, 582.0, 408.0, 636.0, 579.0, 636.0, 633.0, 633.0, 636.0, 639.0, 627.0, 633.0, 636.0, 630.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 630.0, 633.0, 633.0, 630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 317.0, 314.0, 325.0, 285.0, 297.0, 319.0, 317.0, 316.0, 317.0, 293.0, 294.0, 313.0, 320.0, 319.0, 314.0, 316.0, 314.0, 322.0, 317.0, 322.0, 317.0, 313.0, 311.0, 311.0, 319.0, 291.0, 291.0, 208.0, 200.0, 314.0, 322.0, 284.0, 295.0, 319.0, 317.0, 313.0, 320.0, 311.0, 322.0, 319.0, 317.0, 322.0, 317.0, 313.0, 314.0, 322.0, 311.0, 319.0, 317.0, 321.0, 309.0, 319.0, 320.0, 318.0, 312.0, 293.0, 294.0, 311.0, 319.0, 319.0, 317.0, 288.0, 299.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7681470637203944, "mean_processing_ms": 0.22224947461581387, "mean_inference_ms": 1.3708820792528533}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11448000, "num_steps_sampled": 6105600, "sample_time_ms": 22237.355, "load_time_ms": 38.878, "grad_time_ms": 10934.933, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003096085973083973, "policy_loss": -0.004263754468411207, "vf_loss": 79.16039276123047, "vf_explained_var": 0.7912160754203796, "kl": 0.001874853391200304, "entropy": 1.1124080419540405, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6105600, "episodes_total": 15264, "training_iteration": 477, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-53-45", "timestamp": 1660262025, "time_this_iter_s": 35.63289189338684, "time_total_s": 20433.258526563644, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20433.258526563644, "timesteps_since_restore": 6105600, "iterations_since_restore": 477, "perf": {"cpu_util_percent": 34.068627450980394, "ram_util_percent": 59.11960784313726}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 606.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 303.34}, "custom_metrics": {"sparse_reward_mean": 210.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.68, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.64, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.63, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.84, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0, 639.0, 630.0, 633.0, 633.0, 630.0, 636.0, 582.0, 636.0, 633.0, 636.0, 291.0, 639.0, 576.0, 587.0, 630.0, 587.0, 633.0, 621.0, 582.0, 630.0, 584.0, 465.0, 636.0, 582.0, 630.0, 636.0, 636.0, 630.0, 636.0, 624.0, 579.0, 624.0, 633.0, 636.0, 582.0, 636.0, 630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 322.0, 317.0, 316.0, 314.0, 317.0, 316.0, 319.0, 314.0, 311.0, 319.0, 324.0, 312.0, 294.0, 288.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 145.0, 146.0, 319.0, 320.0, 292.0, 284.0, 293.0, 294.0, 313.0, 317.0, 296.0, 291.0, 318.0, 315.0, 313.0, 308.0, 296.0, 286.0, 314.0, 316.0, 298.0, 286.0, 228.0, 237.0, 319.0, 317.0, 296.0, 286.0, 313.0, 317.0, 327.0, 309.0, 322.0, 314.0, 316.0, 314.0, 319.0, 317.0, 311.0, 313.0, 283.0, 296.0, 310.0, 314.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7675359476043465, "mean_processing_ms": 0.22213291684157827, "mean_inference_ms": 1.3705566142110346}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11472000, "num_steps_sampled": 6118400, "sample_time_ms": 22722.857, "load_time_ms": 39.067, "grad_time_ms": 10783.352, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003133426944259554, "policy_loss": -0.007279651705175638, "vf_loss": 75.2169418334961, "vf_explained_var": 0.7742903232574463, "kl": 0.0020874079782515764, "entropy": 1.110769271850586, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6118400, "episodes_total": 15296, "training_iteration": 478, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-54-19", "timestamp": 1660262059, "time_this_iter_s": 34.45740509033203, "time_total_s": 20467.715931653976, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20467.715931653976, "timesteps_since_restore": 6118400, "iterations_since_restore": 478, "perf": {"cpu_util_percent": 28.777083333333334, "ram_util_percent": 59.04374999999999}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 608.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 304.12}, "custom_metrics": {"sparse_reward_mean": 210.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 186.64, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.8, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.19, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.45, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.96, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0, 633.0, 636.0, 582.0, 636.0, 630.0, 636.0, 636.0, 578.0, 639.0, 582.0, 576.0, 587.0, 636.0, 582.0, 533.0, 633.0, 636.0, 630.0, 630.0, 579.0, 587.0, 630.0, 639.0, 639.0, 579.0, 636.0, 180.0, 587.0, 633.0, 630.0, 627.0, 587.0, 633.0, 633.0, 582.0, 630.0, 630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 316.0, 317.0, 319.0, 317.0, 289.0, 293.0, 319.0, 317.0, 313.0, 317.0, 319.0, 317.0, 316.0, 320.0, 279.0, 299.0, 319.0, 320.0, 293.0, 289.0, 291.0, 285.0, 293.0, 294.0, 314.0, 322.0, 288.0, 294.0, 254.0, 279.0, 319.0, 314.0, 319.0, 317.0, 317.0, 313.0, 311.0, 319.0, 282.0, 297.0, 295.0, 292.0, 316.0, 314.0, 319.0, 320.0, 316.0, 323.0, 288.0, 291.0, 324.0, 312.0, 91.0, 89.0, 296.0, 291.0, 319.0, 314.0, 311.0, 319.0, 316.0, 311.0, 306.0, 281.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7669199085026867, "mean_processing_ms": 0.22201572097443972, "mean_inference_ms": 1.3701586168616826}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11496000, "num_steps_sampled": 6131200, "sample_time_ms": 22699.68, "load_time_ms": 39.016, "grad_time_ms": 10734.597, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008129358175210655, "policy_loss": -0.006242450326681137, "vf_loss": 76.11907196044922, "vf_explained_var": 0.7632293701171875, "kl": 0.0021639217156916857, "entropy": 1.1130343675613403, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6131200, "episodes_total": 15328, "training_iteration": 479, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-54-53", "timestamp": 1660262093, "time_this_iter_s": 33.185157775878906, "time_total_s": 20500.901089429855, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20500.901089429855, "timesteps_since_restore": 6131200, "iterations_since_restore": 479, "perf": {"cpu_util_percent": 30.472340425531915, "ram_util_percent": 58.97021276595746}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 613.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.835}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.07, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.34, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.56, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.41, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.41, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.41, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0, 633.0, 633.0, 582.0, 630.0, 630.0, 627.0, 633.0, 561.0, 636.0, 630.0, 525.0, 633.0, 636.0, 636.0, 582.0, 636.0, 636.0, 630.0, 633.0, 636.0, 639.0, 630.0, 636.0, 587.0, 582.0, 636.0, 544.0, 633.0, 636.0, 630.0, 639.0, 584.0, 636.0, 579.0, 576.0, 633.0, 606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 321.0, 312.0, 316.0, 317.0, 288.0, 294.0, 311.0, 319.0, 316.0, 314.0, 319.0, 308.0, 316.0, 317.0, 279.0, 282.0, 316.0, 320.0, 316.0, 314.0, 259.0, 266.0, 314.0, 319.0, 319.0, 317.0, 322.0, 314.0, 291.0, 291.0, 319.0, 317.0, 319.0, 317.0, 319.0, 311.0, 313.0, 320.0, 316.0, 320.0, 317.0, 322.0, 305.0, 325.0, 313.0, 323.0, 294.0, 293.0, 291.0, 291.0, 319.0, 317.0, 271.0, 273.0, 311.0, 322.0, 314.0, 322.0, 316.0, 314.0, 320.0, 319.0, 285.0, 299.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7662995807176626, "mean_processing_ms": 0.22189831707550936, "mean_inference_ms": 1.3696437835161013}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11520000, "num_steps_sampled": 6144000, "sample_time_ms": 22732.007, "load_time_ms": 38.46, "grad_time_ms": 10636.502, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033598102163523436, "policy_loss": -0.003906731028109789, "vf_loss": 78.25418090820312, "vf_explained_var": 0.768868625164032, "kl": 0.0016973327146843076, "entropy": 1.117727279663086, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6144000, "episodes_total": 15360, "training_iteration": 480, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-55-24", "timestamp": 1660262124, "time_this_iter_s": 31.27005410194397, "time_total_s": 20532.1711435318, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20532.1711435318, "timesteps_since_restore": 6144000, "iterations_since_restore": 480, "perf": {"cpu_util_percent": 31.795555555555556, "ram_util_percent": 59.01333333333335}} -{"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 611.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 305.56}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 186.72, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.38, "onion_pickup_agent_1_min": 14, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.18, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.29, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.89, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.52, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.29, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.89, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.29, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.89, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0, 636.0, 579.0, 576.0, 633.0, 606.0, 582.0, 636.0, 563.0, 615.0, 627.0, 582.0, 633.0, 587.0, 627.0, 630.0, 579.0, 630.0, 633.0, 462.0, 630.0, 582.0, 633.0, 630.0, 633.0, 636.0, 636.0, 584.0, 633.0, 582.0, 636.0, 633.0, 630.0, 582.0, 630.0, 573.0, 627.0, 633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 321.0, 315.0, 294.0, 285.0, 285.0, 291.0, 319.0, 314.0, 302.0, 304.0, 289.0, 293.0, 319.0, 317.0, 284.0, 279.0, 305.0, 310.0, 319.0, 308.0, 294.0, 288.0, 319.0, 314.0, 293.0, 294.0, 316.0, 311.0, 321.0, 309.0, 283.0, 296.0, 311.0, 319.0, 314.0, 319.0, 239.0, 223.0, 316.0, 314.0, 288.0, 294.0, 319.0, 314.0, 319.0, 311.0, 311.0, 322.0, 316.0, 320.0, 314.0, 322.0, 307.0, 277.0, 314.0, 319.0, 288.0, 294.0, 319.0, 317.0, 316.0, 317.0, 313.0, 317.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7656785099973358, "mean_processing_ms": 0.2217805887765104, "mean_inference_ms": 1.3690072686883668}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11544000, "num_steps_sampled": 6156800, "sample_time_ms": 22631.205, "load_time_ms": 38.401, "grad_time_ms": 10438.368, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -1.2905181392852683e-05, "policy_loss": -0.007116043474525213, "vf_loss": 76.6054458618164, "vf_explained_var": 0.7700133323669434, "kl": 0.0019200993701815605, "entropy": 1.1147748231887817, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6156800, "episodes_total": 15392, "training_iteration": 481, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-55-56", "timestamp": 1660262156, "time_this_iter_s": 32.10103392601013, "time_total_s": 20564.27217745781, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20564.27217745781, "timesteps_since_restore": 6156800, "iterations_since_restore": 481, "perf": {"cpu_util_percent": 31.702222222222222, "ram_util_percent": 59.035555555555575}} -{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 613.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 306.58}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.56, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.59, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.04, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.08, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.7, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 0.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.08, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.08, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0, 582.0, 630.0, 573.0, 627.0, 633.0, 633.0, 630.0, 579.0, 639.0, 636.0, 639.0, 639.0, 587.0, 636.0, 587.0, 627.0, 633.0, 576.0, 582.0, 633.0, 636.0, 630.0, 630.0, 633.0, 636.0, 525.0, 630.0, 582.0, 587.0, 639.0, 627.0, 582.0, 582.0, 633.0, 636.0, 630.0, 578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 288.0, 294.0, 310.0, 320.0, 292.0, 281.0, 321.0, 306.0, 311.0, 322.0, 319.0, 314.0, 308.0, 322.0, 296.0, 283.0, 319.0, 320.0, 316.0, 320.0, 317.0, 322.0, 319.0, 320.0, 288.0, 299.0, 316.0, 320.0, 301.0, 286.0, 313.0, 314.0, 317.0, 316.0, 291.0, 285.0, 286.0, 296.0, 319.0, 314.0, 319.0, 317.0, 316.0, 314.0, 313.0, 317.0, 316.0, 317.0, 314.0, 322.0, 257.0, 268.0, 316.0, 314.0, 296.0, 286.0, 296.0, 291.0, 319.0, 320.0, 305.0, 322.0, 293.0, 289.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7650611031190503, "mean_processing_ms": 0.2216638185152556, "mean_inference_ms": 1.3683447229242562}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11568000, "num_steps_sampled": 6169600, "sample_time_ms": 22476.544, "load_time_ms": 38.386, "grad_time_ms": 10500.152, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 5.255569703876972e-05, "policy_loss": -0.006667418871074915, "vf_loss": 72.75797271728516, "vf_explained_var": 0.775852620601654, "kl": 0.0019747635815292597, "entropy": 1.111660122871399, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6169600, "episodes_total": 15424, "training_iteration": 482, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-56-28", "timestamp": 1660262188, "time_this_iter_s": 31.53903889656067, "time_total_s": 20595.81121635437, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20595.81121635437, "timesteps_since_restore": 6169600, "iterations_since_restore": 482, "perf": {"cpu_util_percent": 34.43555555555556, "ram_util_percent": 58.98222222222224}} -{"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 614.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.265}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 187.73, "shaped_reward_min": 152, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.81, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.41, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.57, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.31, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.68, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.41, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.31, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.31, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0, 582.0, 633.0, 636.0, 630.0, 578.0, 582.0, 630.0, 587.0, 570.0, 587.0, 630.0, 612.0, 624.0, 633.0, 570.0, 587.0, 582.0, 630.0, 587.0, 636.0, 636.0, 582.0, 552.0, 636.0, 584.0, 633.0, 590.0, 624.0, 630.0, 630.0, 636.0, 609.0, 630.0, 633.0, 633.0, 636.0, 636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 294.0, 288.0, 308.0, 325.0, 319.0, 317.0, 313.0, 317.0, 295.0, 283.0, 288.0, 294.0, 313.0, 317.0, 290.0, 297.0, 284.0, 286.0, 299.0, 288.0, 315.0, 315.0, 304.0, 308.0, 317.0, 307.0, 311.0, 322.0, 295.0, 275.0, 296.0, 291.0, 293.0, 289.0, 316.0, 314.0, 288.0, 299.0, 319.0, 317.0, 314.0, 322.0, 291.0, 291.0, 279.0, 273.0, 324.0, 312.0, 291.0, 293.0, 319.0, 314.0, 296.0, 294.0, 311.0, 313.0, 311.0, 319.0, 313.0, 317.0, 319.0, 317.0, 301.0, 308.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.764446149993761, "mean_processing_ms": 0.22154654084728279, "mean_inference_ms": 1.3676326972645958}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11592000, "num_steps_sampled": 6182400, "sample_time_ms": 22364.392, "load_time_ms": 38.739, "grad_time_ms": 10348.578, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000403035432100296, "policy_loss": -0.006678896490484476, "vf_loss": 76.43026733398438, "vf_explained_var": 0.76324862241745, "kl": 0.0020988413598388433, "entropy": 1.1221919059753418, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6182400, "episodes_total": 15456, "training_iteration": 483, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-56-57", "timestamp": 1660262217, "time_this_iter_s": 29.545005083084106, "time_total_s": 20625.356221437454, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20625.356221437454, "timesteps_since_restore": 6182400, "iterations_since_restore": 483, "perf": {"cpu_util_percent": 31.23658536585366, "ram_util_percent": 59.02682926829268}} -{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 614.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 307.045}, "custom_metrics": {"sparse_reward_mean": 213.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.09, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.35, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.38, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.59, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.38, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.38, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0, 630.0, 633.0, 633.0, 636.0, 636.0, 630.0, 587.0, 630.0, 621.0, 639.0, 636.0, 630.0, 630.0, 639.0, 561.0, 633.0, 630.0, 627.0, 582.0, 582.0, 636.0, 587.0, 636.0, 630.0, 636.0, 513.0, 576.0, 633.0, 633.0, 630.0, 636.0, 582.0, 587.0, 633.0, 624.0, 633.0, 636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 316.0, 314.0, 311.0, 322.0, 316.0, 317.0, 319.0, 317.0, 313.0, 323.0, 313.0, 317.0, 293.0, 294.0, 319.0, 311.0, 308.0, 313.0, 319.0, 320.0, 314.0, 322.0, 316.0, 314.0, 311.0, 319.0, 317.0, 322.0, 273.0, 288.0, 316.0, 317.0, 316.0, 314.0, 313.0, 314.0, 291.0, 291.0, 291.0, 291.0, 314.0, 322.0, 298.0, 289.0, 314.0, 322.0, 316.0, 314.0, 324.0, 312.0, 249.0, 264.0, 280.0, 296.0, 316.0, 317.0, 316.0, 317.0, 311.0, 319.0, 319.0, 317.0, 288.0, 294.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7638319008211926, "mean_processing_ms": 0.22142837816064478, "mean_inference_ms": 1.3669160139559398}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11616000, "num_steps_sampled": 6195200, "sample_time_ms": 22342.562, "load_time_ms": 38.969, "grad_time_ms": 10165.472, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003674185834825039, "policy_loss": -0.0030232470016926527, "vf_loss": 72.50147247314453, "vf_explained_var": 0.7972453236579895, "kl": 0.002131336135789752, "entropy": 1.1054468154907227, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6195200, "episodes_total": 15488, "training_iteration": 484, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-57-28", "timestamp": 1660262248, "time_this_iter_s": 30.70011305809021, "time_total_s": 20656.056334495544, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20656.056334495544, "timesteps_since_restore": 6195200, "iterations_since_restore": 484, "perf": {"cpu_util_percent": 29.970454545454547, "ram_util_percent": 59.07727272727273}} -{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 616.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.095}, "custom_metrics": {"sparse_reward_mean": 213.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.99, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.99, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.25, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.54, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.01, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.54, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.54, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0, 587.0, 633.0, 624.0, 633.0, 636.0, 582.0, 627.0, 633.0, 636.0, 630.0, 582.0, 627.0, 630.0, 579.0, 630.0, 630.0, 633.0, 630.0, 630.0, 633.0, 530.0, 636.0, 633.0, 633.0, 576.0, 633.0, 636.0, 630.0, 587.0, 633.0, 633.0, 581.0, 587.0, 633.0, 630.0, 636.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 296.0, 291.0, 316.0, 317.0, 308.0, 316.0, 319.0, 314.0, 322.0, 314.0, 293.0, 289.0, 313.0, 314.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 281.0, 301.0, 313.0, 314.0, 322.0, 308.0, 291.0, 288.0, 311.0, 319.0, 318.0, 312.0, 319.0, 314.0, 311.0, 319.0, 316.0, 314.0, 316.0, 317.0, 268.0, 262.0, 316.0, 320.0, 316.0, 317.0, 309.0, 324.0, 288.0, 288.0, 314.0, 319.0, 314.0, 322.0, 311.0, 319.0, 296.0, 291.0, 318.0, 315.0, 318.0, 315.0, 296.0, 285.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7632241989034774, "mean_processing_ms": 0.22131123162111455, "mean_inference_ms": 1.3662700284091551}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11640000, "num_steps_sampled": 6208000, "sample_time_ms": 22316.513, "load_time_ms": 39.137, "grad_time_ms": 10131.169, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029844159726053476, "policy_loss": -0.003889852436259389, "vf_loss": 74.29019165039062, "vf_explained_var": 0.7697036862373352, "kl": 0.0019323105225339532, "entropy": 1.1094969511032104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6208000, "episodes_total": 15520, "training_iteration": 485, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-58-02", "timestamp": 1660262282, "time_this_iter_s": 33.77160096168518, "time_total_s": 20689.82793545723, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20689.82793545723, "timesteps_since_restore": 6208000, "iterations_since_restore": 485, "perf": {"cpu_util_percent": 30.51875, "ram_util_percent": 58.97291666666666}} -{"episode_reward_max": 639.0, "episode_reward_min": 120.0, "episode_reward_mean": 613.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.905}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 188.61, "shaped_reward_min": 40, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.16, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 6.02, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.79, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0, 587.0, 633.0, 630.0, 636.0, 582.0, 587.0, 636.0, 582.0, 582.0, 633.0, 630.0, 636.0, 627.0, 633.0, 587.0, 633.0, 639.0, 630.0, 587.0, 582.0, 636.0, 630.0, 636.0, 636.0, 630.0, 636.0, 120.0, 636.0, 630.0, 633.0, 633.0, 633.0, 630.0, 630.0, 630.0, 639.0, 579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 293.0, 294.0, 309.0, 324.0, 316.0, 314.0, 319.0, 317.0, 290.0, 292.0, 301.0, 286.0, 324.0, 312.0, 289.0, 293.0, 291.0, 291.0, 313.0, 320.0, 321.0, 309.0, 319.0, 317.0, 316.0, 311.0, 319.0, 314.0, 291.0, 296.0, 319.0, 314.0, 319.0, 320.0, 316.0, 314.0, 291.0, 296.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 320.0, 314.0, 322.0, 319.0, 311.0, 319.0, 317.0, 66.0, 54.0, 314.0, 322.0, 310.0, 320.0, 314.0, 319.0, 319.0, 314.0, 322.0, 311.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0]}, "sampler_perf": {"mean_env_wait_ms": 0.762623780459851, "mean_processing_ms": 0.2211964092332032, "mean_inference_ms": 1.3657033505370475}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11664000, "num_steps_sampled": 6220800, "sample_time_ms": 22297.013, "load_time_ms": 39.012, "grad_time_ms": 10179.524, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004206617828458548, "policy_loss": -0.003324081189930439, "vf_loss": 80.87010192871094, "vf_explained_var": 0.7655022740364075, "kl": 0.001813961542211473, "entropy": 1.1126155853271484, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6220800, "episodes_total": 15552, "training_iteration": 486, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-58-35", "timestamp": 1660262315, "time_this_iter_s": 33.220837116241455, "time_total_s": 20723.04877257347, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20723.04877257347, "timesteps_since_restore": 6220800, "iterations_since_restore": 486, "perf": {"cpu_util_percent": 32.0404255319149, "ram_util_percent": 59.24255319148937}} -{"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 618.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.46}, "custom_metrics": {"sparse_reward_mean": 214.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.72, "shaped_reward_min": 161, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.4, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.42, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.07, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.76, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0, 630.0, 630.0, 630.0, 639.0, 579.0, 636.0, 639.0, 590.0, 630.0, 639.0, 639.0, 630.0, 636.0, 636.0, 639.0, 633.0, 636.0, 582.0, 636.0, 621.0, 630.0, 633.0, 630.0, 633.0, 576.0, 582.0, 636.0, 630.0, 630.0, 587.0, 630.0, 630.0, 636.0, 633.0, 633.0, 633.0, 630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 308.0, 322.0, 311.0, 319.0, 311.0, 319.0, 317.0, 322.0, 285.0, 294.0, 319.0, 317.0, 319.0, 320.0, 299.0, 291.0, 316.0, 314.0, 319.0, 320.0, 324.0, 315.0, 316.0, 314.0, 314.0, 322.0, 324.0, 312.0, 317.0, 322.0, 314.0, 319.0, 314.0, 322.0, 286.0, 296.0, 317.0, 319.0, 308.0, 313.0, 313.0, 317.0, 319.0, 314.0, 321.0, 309.0, 319.0, 314.0, 299.0, 277.0, 293.0, 289.0, 314.0, 322.0, 311.0, 319.0, 316.0, 314.0, 288.0, 299.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.762033130729778, "mean_processing_ms": 0.22108464880455408, "mean_inference_ms": 1.3652218358269517}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11688000, "num_steps_sampled": 6233600, "sample_time_ms": 22173.711, "load_time_ms": 39.002, "grad_time_ms": 10239.525, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017484220443293452, "policy_loss": -0.005304198246449232, "vf_loss": 76.0898666381836, "vf_explained_var": 0.767227828502655, "kl": 0.0019641267135739326, "entropy": 1.1127411127090454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6233600, "episodes_total": 15584, "training_iteration": 487, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-59-10", "timestamp": 1660262350, "time_this_iter_s": 34.99830985069275, "time_total_s": 20758.047082424164, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20758.047082424164, "timesteps_since_restore": 6233600, "iterations_since_restore": 487, "perf": {"cpu_util_percent": 33.78367346938776, "ram_util_percent": 59.60612244897959}} -{"episode_reward_max": 644.0, "episode_reward_min": 522.0, "episode_reward_mean": 619.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 309.9}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.8, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.48, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.5, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.03, "potting_onion_agent_1_min": 15, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.38, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 6.09, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.63, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.03, "optimal_onion_potting_agent_1_min": 15, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.03, "viable_onion_potting_agent_1_min": 15, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0, 636.0, 633.0, 633.0, 633.0, 630.0, 576.0, 636.0, 621.0, 636.0, 636.0, 636.0, 576.0, 587.0, 633.0, 633.0, 582.0, 522.0, 633.0, 579.0, 636.0, 639.0, 582.0, 633.0, 576.0, 627.0, 636.0, 587.0, 639.0, 630.0, 636.0, 579.0, 582.0, 636.0, 582.0, 579.0, 633.0, 636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 319.0, 317.0, 319.0, 314.0, 316.0, 317.0, 327.0, 306.0, 313.0, 317.0, 294.0, 282.0, 317.0, 319.0, 310.0, 311.0, 319.0, 317.0, 319.0, 317.0, 309.0, 327.0, 293.0, 283.0, 293.0, 294.0, 311.0, 322.0, 314.0, 319.0, 290.0, 292.0, 262.0, 260.0, 316.0, 317.0, 286.0, 293.0, 317.0, 319.0, 322.0, 317.0, 291.0, 291.0, 319.0, 314.0, 282.0, 294.0, 313.0, 314.0, 320.0, 316.0, 296.0, 291.0, 322.0, 317.0, 306.0, 324.0, 314.0, 322.0, 285.0, 294.0, 289.0, 293.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7614456124606471, "mean_processing_ms": 0.22097346860872114, "mean_inference_ms": 1.3647307081670101}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11712000, "num_steps_sampled": 6246400, "sample_time_ms": 21961.201, "load_time_ms": 38.731, "grad_time_ms": 10224.967, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00313456985168159, "policy_loss": -0.003917869180440903, "vf_loss": 76.04095458984375, "vf_explained_var": 0.7662898898124695, "kl": 0.0023237813729792833, "entropy": 1.1033259630203247, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6246400, "episodes_total": 15616, "training_iteration": 488, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_00-59-42", "timestamp": 1660262382, "time_this_iter_s": 32.1832549571991, "time_total_s": 20790.230337381363, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20790.230337381363, "timesteps_since_restore": 6246400, "iterations_since_restore": 488, "perf": {"cpu_util_percent": 31.12391304347826, "ram_util_percent": 59.10434782608694}} -{"episode_reward_max": 644.0, "episode_reward_min": 561.0, "episode_reward_mean": 624.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 277.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 312.325}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 191.05, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 18.65, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.52, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 17.98, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.54, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.21, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.0, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 5.68, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.54, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.21, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.54, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.21, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 636.0, 582.0, 579.0, 633.0, 636.0, 639.0, 633.0, 636.0, 630.0, 587.0, 633.0, 630.0, 630.0, 636.0, 636.0, 587.0, 636.0, 630.0, 633.0, 582.0, 633.0, 633.0, 636.0, 561.0, 630.0, 630.0, 567.0, 639.0, 630.0, 627.0, 627.0, 630.0, 587.0, 633.0, 633.0, 582.0, 579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 285.0, 297.0, 283.0, 296.0, 314.0, 319.0, 319.0, 317.0, 319.0, 320.0, 316.0, 317.0, 322.0, 314.0, 318.0, 312.0, 296.0, 291.0, 316.0, 317.0, 316.0, 314.0, 308.0, 322.0, 319.0, 317.0, 314.0, 322.0, 299.0, 288.0, 324.0, 312.0, 313.0, 317.0, 308.0, 325.0, 288.0, 294.0, 316.0, 317.0, 319.0, 314.0, 319.0, 317.0, 277.0, 284.0, 316.0, 314.0, 316.0, 314.0, 278.0, 289.0, 319.0, 320.0, 316.0, 314.0, 305.0, 322.0, 310.0, 317.0, 316.0, 314.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7608592858442179, "mean_processing_ms": 0.22086239768906624, "mean_inference_ms": 1.3642056926460657}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11736000, "num_steps_sampled": 6259200, "sample_time_ms": 22026.8, "load_time_ms": 38.825, "grad_time_ms": 10224.17, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021181919146329165, "policy_loss": -0.004830162972211838, "vf_loss": 74.9923324584961, "vf_explained_var": 0.7754970192909241, "kl": 0.002056455472484231, "entropy": 1.1017627716064453, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6259200, "episodes_total": 15648, "training_iteration": 489, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-00-16", "timestamp": 1660262416, "time_this_iter_s": 33.8461229801178, "time_total_s": 20824.07646036148, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20824.07646036148, "timesteps_since_restore": 6259200, "iterations_since_restore": 489, "perf": {"cpu_util_percent": 34.53541666666667, "ram_util_percent": 59.26458333333333}} -{"episode_reward_max": 644.0, "episode_reward_min": 575.0, "episode_reward_mean": 626.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 285.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 313.145}, "custom_metrics": {"sparse_reward_mean": 217.2, "sparse_reward_min": 200, "sparse_reward_max": 220, "shaped_reward_mean": 191.89, "shaped_reward_min": 160, "shaped_reward_max": 204, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.1, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.99, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.72, "potting_onion_agent_0_min": 14, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.17, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.43, "dish_pickup_agent_0_min": 5, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.97, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.56, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.21, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.72, "optimal_onion_potting_agent_0_min": 14, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.17, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.72, "viable_onion_potting_agent_0_min": 14, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.17, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 587.0, 633.0, 633.0, 582.0, 579.0, 639.0, 639.0, 600.0, 630.0, 639.0, 644.0, 639.0, 633.0, 630.0, 639.0, 630.0, 636.0, 582.0, 630.0, 633.0, 633.0, 633.0, 636.0, 630.0, 630.0, 633.0, 582.0, 636.0, 630.0, 633.0, 630.0, 627.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 296.0, 291.0, 316.0, 317.0, 316.0, 317.0, 288.0, 294.0, 293.0, 286.0, 322.0, 317.0, 319.0, 320.0, 301.0, 299.0, 321.0, 309.0, 317.0, 322.0, 319.0, 325.0, 319.0, 320.0, 316.0, 317.0, 316.0, 314.0, 314.0, 325.0, 314.0, 316.0, 314.0, 322.0, 291.0, 291.0, 313.0, 317.0, 314.0, 319.0, 314.0, 319.0, 316.0, 317.0, 319.0, 317.0, 311.0, 319.0, 313.0, 317.0, 319.0, 314.0, 291.0, 291.0, 319.0, 317.0, 311.0, 319.0, 314.0, 319.0, 316.0, 314.0, 310.0, 317.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.760275311275054, "mean_processing_ms": 0.22075194950197713, "mean_inference_ms": 1.36361755561513}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11760000, "num_steps_sampled": 6272000, "sample_time_ms": 22027.209, "load_time_ms": 38.894, "grad_time_ms": 10417.749, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028506640810519457, "policy_loss": -0.003879321739077568, "vf_loss": 72.80782318115234, "vf_explained_var": 0.7767069935798645, "kl": 0.0016769763315096498, "entropy": 1.101601243019104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6272000, "episodes_total": 15680, "training_iteration": 490, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-00-49", "timestamp": 1660262449, "time_this_iter_s": 33.20848989486694, "time_total_s": 20857.284950256348, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20857.284950256348, "timesteps_since_restore": 6272000, "iterations_since_restore": 490, "perf": {"cpu_util_percent": 36.26808510638298, "ram_util_percent": 59.0851063829787}} -{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 624.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 312.42}, "custom_metrics": {"sparse_reward_mean": 216.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 191.24, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.13, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.83, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.73, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 14, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.39, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.28, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.73, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 14, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.73, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 14, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0, 639.0, 582.0, 633.0, 636.0, 633.0, 633.0, 636.0, 636.0, 639.0, 639.0, 639.0, 633.0, 639.0, 624.0, 624.0, 636.0, 630.0, 582.0, 636.0, 582.0, 576.0, 639.0, 630.0, 627.0, 630.0, 627.0, 630.0, 630.0, 639.0, 636.0, 636.0, 639.0, 636.0, 636.0, 636.0, 630.0, 636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 322.0, 317.0, 288.0, 294.0, 316.0, 317.0, 317.0, 319.0, 314.0, 319.0, 318.0, 315.0, 319.0, 317.0, 314.0, 322.0, 322.0, 317.0, 319.0, 320.0, 319.0, 320.0, 316.0, 317.0, 327.0, 312.0, 314.0, 310.0, 316.0, 308.0, 316.0, 320.0, 308.0, 322.0, 291.0, 291.0, 319.0, 317.0, 294.0, 288.0, 285.0, 291.0, 319.0, 320.0, 311.0, 319.0, 308.0, 319.0, 319.0, 311.0, 313.0, 314.0, 319.0, 311.0, 316.0, 314.0, 322.0, 317.0, 319.0, 317.0, 316.0, 320.0, 319.0, 320.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7596883332679625, "mean_processing_ms": 0.22064007492809667, "mean_inference_ms": 1.3629331607275919}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11784000, "num_steps_sampled": 6284800, "sample_time_ms": 21863.417, "load_time_ms": 38.973, "grad_time_ms": 10519.041, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0017596340039744973, "policy_loss": -0.008380659855902195, "vf_loss": 71.76502227783203, "vf_explained_var": 0.787438690662384, "kl": 0.0020715922582894564, "entropy": 1.110949158668518, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6284800, "episodes_total": 15712, "training_iteration": 491, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-01-21", "timestamp": 1660262481, "time_this_iter_s": 31.476083278656006, "time_total_s": 20888.761033535004, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20888.761033535004, "timesteps_since_restore": 6284800, "iterations_since_restore": 491, "perf": {"cpu_util_percent": 35.37777777777777, "ram_util_percent": 59.10222222222222}} -{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 619.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 309.65}, "custom_metrics": {"sparse_reward_mean": 215.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 189.3, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.17, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.3, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 13, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.82, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.3, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.1, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.69, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.6, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.29, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 13, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.82, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 13, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.82, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0, 636.0, 636.0, 636.0, 630.0, 636.0, 636.0, 633.0, 575.0, 636.0, 633.0, 639.0, 636.0, 582.0, 636.0, 639.0, 636.0, 639.0, 579.0, 636.0, 636.0, 624.0, 630.0, 624.0, 639.0, 630.0, 633.0, 633.0, 639.0, 630.0, 633.0, 639.0, 630.0, 587.0, 630.0, 636.0, 587.0, 639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 317.0, 319.0, 324.0, 312.0, 319.0, 317.0, 316.0, 314.0, 319.0, 317.0, 319.0, 317.0, 316.0, 317.0, 288.0, 287.0, 319.0, 317.0, 319.0, 314.0, 319.0, 320.0, 316.0, 320.0, 285.0, 297.0, 319.0, 317.0, 317.0, 322.0, 314.0, 322.0, 317.0, 322.0, 288.0, 291.0, 317.0, 319.0, 314.0, 322.0, 310.0, 314.0, 314.0, 316.0, 316.0, 308.0, 319.0, 320.0, 316.0, 314.0, 316.0, 317.0, 314.0, 319.0, 319.0, 320.0, 318.0, 312.0, 313.0, 320.0, 315.0, 324.0, 319.0, 311.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7590978232117643, "mean_processing_ms": 0.22052625115278437, "mean_inference_ms": 1.3621520810826262}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11808000, "num_steps_sampled": 6297600, "sample_time_ms": 21732.6, "load_time_ms": 39.061, "grad_time_ms": 10636.833, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013283310690894723, "policy_loss": -0.00562013266608119, "vf_loss": 75.04083251953125, "vf_explained_var": 0.7633475661277771, "kl": 0.002308204537257552, "entropy": 1.1112407445907593, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6297600, "episodes_total": 15744, "training_iteration": 492, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-01-52", "timestamp": 1660262512, "time_this_iter_s": 31.412389039993286, "time_total_s": 20920.173422574997, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20920.173422574997, "timesteps_since_restore": 6297600, "iterations_since_restore": 492, "perf": {"cpu_util_percent": 32.93999999999999, "ram_util_percent": 59.11333333333331}} -{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 615.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 307.99}, "custom_metrics": {"sparse_reward_mean": 213.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.38, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 13, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 13, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 17.86, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 6.27, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 6.05, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.64, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 17.86, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 17.86, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0, 587.0, 630.0, 636.0, 587.0, 639.0, 627.0, 633.0, 636.0, 627.0, 633.0, 630.0, 627.0, 579.0, 630.0, 636.0, 639.0, 633.0, 630.0, 579.0, 639.0, 633.0, 615.0, 636.0, 630.0, 579.0, 582.0, 639.0, 636.0, 639.0, 444.0, 630.0, 633.0, 624.0, 630.0, 639.0, 639.0, 573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 299.0, 316.0, 314.0, 314.0, 322.0, 290.0, 297.0, 318.0, 321.0, 311.0, 316.0, 316.0, 317.0, 319.0, 317.0, 311.0, 316.0, 311.0, 322.0, 311.0, 319.0, 307.0, 320.0, 288.0, 291.0, 317.0, 313.0, 319.0, 317.0, 319.0, 320.0, 314.0, 319.0, 314.0, 316.0, 291.0, 288.0, 319.0, 320.0, 316.0, 317.0, 307.0, 308.0, 314.0, 322.0, 311.0, 319.0, 289.0, 290.0, 291.0, 291.0, 316.0, 323.0, 314.0, 322.0, 322.0, 317.0, 222.0, 222.0, 311.0, 319.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7584994121289265, "mean_processing_ms": 0.22040925813735274, "mean_inference_ms": 1.3613199972214534}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11832000, "num_steps_sampled": 6310400, "sample_time_ms": 21713.634, "load_time_ms": 39.62, "grad_time_ms": 11039.119, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016831206157803535, "policy_loss": -0.005174629390239716, "vf_loss": 74.1271743774414, "vf_explained_var": 0.7761192321777344, "kl": 0.002214068779721856, "entropy": 1.1099259853363037, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6310400, "episodes_total": 15776, "training_iteration": 493, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-02-26", "timestamp": 1660262546, "time_this_iter_s": 33.39047908782959, "time_total_s": 20953.563901662827, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20953.563901662827, "timesteps_since_restore": 6310400, "iterations_since_restore": 493, "perf": {"cpu_util_percent": 34.62978723404255, "ram_util_percent": 59.10212765957445}} -{"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 617.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 308.875}, "custom_metrics": {"sparse_reward_mean": 214.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 189.35, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.12, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.53, "onion_pickup_agent_1_min": 15, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.51, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.61, "useful_onion_pickup_agent_1_min": 14, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 18.07, "potting_onion_agent_1_min": 13, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.41, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 18.07, "optimal_onion_potting_agent_1_min": 13, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 18.07, "viable_onion_potting_agent_1_min": 13, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0, 624.0, 630.0, 639.0, 639.0, 573.0, 570.0, 582.0, 636.0, 627.0, 636.0, 630.0, 633.0, 636.0, 630.0, 576.0, 587.0, 579.0, 633.0, 636.0, 615.0, 633.0, 630.0, 633.0, 573.0, 579.0, 633.0, 582.0, 636.0, 525.0, 621.0, 627.0, 587.0, 630.0, 639.0, 630.0, 579.0, 639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 313.0, 311.0, 311.0, 319.0, 322.0, 317.0, 314.0, 325.0, 285.0, 288.0, 293.0, 277.0, 290.0, 292.0, 317.0, 319.0, 314.0, 313.0, 311.0, 325.0, 313.0, 317.0, 321.0, 312.0, 319.0, 317.0, 313.0, 317.0, 296.0, 280.0, 293.0, 294.0, 293.0, 286.0, 316.0, 317.0, 314.0, 322.0, 307.0, 308.0, 314.0, 319.0, 316.0, 314.0, 319.0, 314.0, 282.0, 291.0, 288.0, 291.0, 319.0, 314.0, 291.0, 291.0, 316.0, 320.0, 263.0, 262.0, 306.0, 315.0, 311.0, 316.0, 293.0, 294.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7579059857165342, "mean_processing_ms": 0.22029379571210359, "mean_inference_ms": 1.3605505316111535}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11856000, "num_steps_sampled": 6323200, "sample_time_ms": 21716.733, "load_time_ms": 39.394, "grad_time_ms": 11172.934, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015474725514650345, "policy_loss": -0.005591364111751318, "vf_loss": 76.93659210205078, "vf_explained_var": 0.7724745869636536, "kl": 0.002216791734099388, "entropy": 1.1096433401107788, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6323200, "episodes_total": 15808, "training_iteration": 494, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-02-58", "timestamp": 1660262578, "time_this_iter_s": 32.0608389377594, "time_total_s": 20985.624740600586, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 20985.624740600586, "timesteps_since_restore": 6323200, "iterations_since_restore": 494, "perf": {"cpu_util_percent": 31.900000000000002, "ram_util_percent": 59.13260869565216}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 615.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 307.915}, "custom_metrics": {"sparse_reward_mean": 213.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 189.03, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.02, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.43, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.54, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.04, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.49, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.66, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.04, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.04, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0, 630.0, 639.0, 630.0, 579.0, 639.0, 630.0, 639.0, 627.0, 636.0, 582.0, 630.0, 593.0, 582.0, 627.0, 630.0, 530.0, 639.0, 636.0, 636.0, 519.0, 627.0, 636.0, 636.0, 633.0, 630.0, 627.0, 581.0, 636.0, 633.0, 633.0, 633.0, 633.0, 633.0, 582.0, 636.0, 633.0, 582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 311.0, 319.0, 319.0, 320.0, 311.0, 319.0, 280.0, 299.0, 322.0, 317.0, 311.0, 319.0, 314.0, 325.0, 308.0, 319.0, 316.0, 320.0, 291.0, 291.0, 311.0, 319.0, 291.0, 302.0, 296.0, 286.0, 316.0, 311.0, 318.0, 312.0, 253.0, 277.0, 322.0, 317.0, 319.0, 317.0, 319.0, 317.0, 264.0, 255.0, 314.0, 313.0, 319.0, 317.0, 319.0, 317.0, 315.0, 318.0, 311.0, 319.0, 316.0, 311.0, 295.0, 286.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 319.0, 314.0, 319.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7573280240404361, "mean_processing_ms": 0.22018399055339372, "mean_inference_ms": 1.359923906771843}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11880000, "num_steps_sampled": 6336000, "sample_time_ms": 21753.166, "load_time_ms": 39.587, "grad_time_ms": 11166.874, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002439265139400959, "policy_loss": -0.004945265594869852, "vf_loss": 79.36029815673828, "vf_explained_var": 0.7895925045013428, "kl": 0.0015944234328344464, "entropy": 1.1029914617538452, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6336000, "episodes_total": 15840, "training_iteration": 495, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-03-32", "timestamp": 1660262612, "time_this_iter_s": 34.07752990722656, "time_total_s": 21019.702270507812, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21019.702270507812, "timesteps_since_restore": 6336000, "iterations_since_restore": 495, "perf": {"cpu_util_percent": 32.84375, "ram_util_percent": 59.13125}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 614.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 307.225}, "custom_metrics": {"sparse_reward_mean": 212.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.85, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.45, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.3, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 18.06, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.65, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.3, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 18.06, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.3, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 18.06, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0, 633.0, 582.0, 636.0, 633.0, 582.0, 567.0, 639.0, 630.0, 639.0, 587.0, 627.0, 639.0, 636.0, 639.0, 630.0, 636.0, 639.0, 636.0, 633.0, 639.0, 564.0, 636.0, 636.0, 633.0, 587.0, 636.0, 636.0, 633.0, 582.0, 630.0, 636.0, 633.0, 582.0, 639.0, 636.0, 633.0, 630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 321.0, 312.0, 293.0, 289.0, 311.0, 325.0, 316.0, 317.0, 288.0, 294.0, 284.0, 283.0, 319.0, 320.0, 316.0, 314.0, 322.0, 317.0, 294.0, 293.0, 313.0, 314.0, 319.0, 320.0, 314.0, 322.0, 319.0, 320.0, 314.0, 316.0, 319.0, 317.0, 322.0, 317.0, 309.0, 327.0, 316.0, 317.0, 319.0, 320.0, 275.0, 289.0, 319.0, 317.0, 321.0, 315.0, 316.0, 317.0, 286.0, 301.0, 319.0, 317.0, 319.0, 317.0, 319.0, 314.0, 296.0, 286.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7567700204261155, "mean_processing_ms": 0.22008117197400467, "mean_inference_ms": 1.3596748332774586}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11904000, "num_steps_sampled": 6348800, "sample_time_ms": 22347.681, "load_time_ms": 39.524, "grad_time_ms": 11120.722, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003682489274069667, "policy_loss": -0.0037913068663328886, "vf_loss": 80.29010772705078, "vf_explained_var": 0.7666907906532288, "kl": 0.0018009584164246917, "entropy": 1.1104191541671753, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6348800, "episodes_total": 15872, "training_iteration": 496, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-04-10", "timestamp": 1660262650, "time_this_iter_s": 38.697832107543945, "time_total_s": 21058.400102615356, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21058.400102615356, "timesteps_since_restore": 6348800, "iterations_since_restore": 496, "perf": {"cpu_util_percent": 33.66909090909091, "ram_util_percent": 59.103636363636355}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 612.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 332.0}, "policy_reward_mean": {"ppo": 306.34}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.28, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.76, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.26, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.77, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0, 582.0, 639.0, 636.0, 633.0, 630.0, 579.0, 636.0, 636.0, 636.0, 627.0, 636.0, 579.0, 639.0, 633.0, 639.0, 576.0, 630.0, 573.0, 630.0, 636.0, 630.0, 519.0, 633.0, 633.0, 636.0, 636.0, 630.0, 639.0, 630.0, 639.0, 587.0, 584.0, 180.0, 630.0, 630.0, 627.0, 587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 288.0, 294.0, 317.0, 322.0, 316.0, 320.0, 319.0, 314.0, 319.0, 311.0, 291.0, 288.0, 319.0, 317.0, 304.0, 332.0, 324.0, 312.0, 321.0, 306.0, 324.0, 312.0, 285.0, 294.0, 322.0, 317.0, 316.0, 317.0, 320.0, 319.0, 288.0, 288.0, 321.0, 309.0, 288.0, 285.0, 311.0, 319.0, 314.0, 322.0, 313.0, 317.0, 256.0, 263.0, 314.0, 319.0, 311.0, 322.0, 319.0, 317.0, 319.0, 317.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 312.0, 327.0, 296.0, 291.0, 296.0, 288.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7562172148453271, "mean_processing_ms": 0.21998071761796992, "mean_inference_ms": 1.359487549617304}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11928000, "num_steps_sampled": 6361600, "sample_time_ms": 22295.567, "load_time_ms": 39.346, "grad_time_ms": 11042.076, "update_time_ms": 0.001, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008341053617186844, "policy_loss": -0.007899199612438679, "vf_loss": 76.18367767333984, "vf_explained_var": 0.7670480608940125, "kl": 0.0021500647999346256, "entropy": 1.106536865234375, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6361600, "episodes_total": 15904, "training_iteration": 497, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-04-44", "timestamp": 1660262684, "time_this_iter_s": 33.68950605392456, "time_total_s": 21092.08960866928, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21092.08960866928, "timesteps_since_restore": 6361600, "iterations_since_restore": 497, "perf": {"cpu_util_percent": 35.32553191489361, "ram_util_percent": 59.131914893617}} -{"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 612.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 306.32}, "custom_metrics": {"sparse_reward_mean": 212.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 188.24, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 18.39, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.23, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.91, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.5, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.7, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.51, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.66, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.23, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.91, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.23, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.91, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0, 180.0, 630.0, 630.0, 627.0, 587.0, 633.0, 636.0, 627.0, 579.0, 630.0, 636.0, 633.0, 582.0, 639.0, 624.0, 630.0, 587.0, 582.0, 582.0, 639.0, 522.0, 639.0, 639.0, 639.0, 636.0, 630.0, 624.0, 639.0, 587.0, 633.0, 636.0, 582.0, 630.0, 579.0, 582.0, 630.0, 636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0, 91.0, 89.0, 316.0, 314.0, 311.0, 319.0, 308.0, 319.0, 293.0, 294.0, 314.0, 319.0, 319.0, 317.0, 318.0, 309.0, 294.0, 285.0, 311.0, 319.0, 319.0, 317.0, 314.0, 319.0, 290.0, 292.0, 319.0, 320.0, 307.0, 317.0, 318.0, 312.0, 296.0, 291.0, 291.0, 291.0, 296.0, 286.0, 319.0, 320.0, 260.0, 262.0, 316.0, 323.0, 319.0, 320.0, 319.0, 320.0, 319.0, 317.0, 316.0, 314.0, 315.0, 309.0, 322.0, 317.0, 296.0, 291.0, 316.0, 317.0, 324.0, 312.0, 294.0, 288.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7556558584426459, "mean_processing_ms": 0.21987743266667745, "mean_inference_ms": 1.3592501447797594}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11952000, "num_steps_sampled": 6374400, "sample_time_ms": 22219.899, "load_time_ms": 39.602, "grad_time_ms": 11084.562, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010325837647542357, "policy_loss": -0.007863701321184635, "vf_loss": 73.90652465820312, "vf_explained_var": 0.7726984024047852, "kl": 0.0017767212120816112, "entropy": 1.1190696954727173, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6374400, "episodes_total": 15936, "training_iteration": 498, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-05-16", "timestamp": 1660262716, "time_this_iter_s": 31.8541898727417, "time_total_s": 21123.943798542023, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21123.943798542023, "timesteps_since_restore": 6374400, "iterations_since_restore": 498, "perf": {"cpu_util_percent": 35.686666666666675, "ram_util_percent": 59.20666666666665}} -{"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 613.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 306.735}, "custom_metrics": {"sparse_reward_mean": 212.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 188.27, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.05, "onion_pickup_agent_0_min": 14, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 16, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.31, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.53, "useful_onion_pickup_agent_1_min": 15, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 14, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 14, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.37, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 16, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 6.37, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.83, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.65, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.61, "soup_pickup_agent_0_min": 4, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.57, "soup_delivery_agent_0_min": 4, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.06, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 16.37, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 16, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.37, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 16, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 578.0, 587.0, 570.0, 587.0, 582.0, 587.0, 582.0, 573.0, 636.0, 630.0, 636.0, 633.0, 600.0, 633.0, 444.0, 630.0, 636.0, 636.0, 633.0, 636.0, 630.0, 582.0, 584.0, 639.0, 633.0, 630.0, 587.0, 587.0, 582.0, 633.0, 630.0, 630.0, 579.0, 582.0, 630.0, 636.0, 633.0, 636.0, 633.0, 636.0, 633.0, 630.0, 630.0, 627.0, 582.0, 630.0, 630.0, 633.0, 636.0, 579.0, 587.0, 633.0, 582.0, 582.0, 636.0, 579.0, 633.0, 633.0, 630.0, 630.0, 582.0, 630.0, 639.0, 579.0, 587.0, 582.0, 639.0, 636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 290.0, 288.0, 288.0, 299.0, 285.0, 285.0, 296.0, 291.0, 291.0, 291.0, 288.0, 299.0, 291.0, 291.0, 285.0, 288.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 301.0, 299.0, 311.0, 322.0, 221.0, 223.0, 308.0, 322.0, 319.0, 317.0, 317.0, 319.0, 319.0, 314.0, 309.0, 327.0, 314.0, 316.0, 286.0, 296.0, 293.0, 291.0, 324.0, 315.0, 319.0, 314.0, 314.0, 316.0, 296.0, 291.0, 293.0, 294.0, 296.0, 286.0, 314.0, 319.0, 316.0, 314.0, 316.0, 314.0, 282.0, 297.0, 288.0, 294.0, 316.0, 314.0, 319.0, 317.0, 314.0, 319.0, 314.0, 322.0, 316.0, 317.0, 314.0, 322.0, 316.0, 317.0, 316.0, 314.0, 316.0, 314.0, 311.0, 316.0, 291.0, 291.0, 316.0, 314.0, 316.0, 314.0, 316.0, 317.0, 312.0, 324.0, 288.0, 291.0, 290.0, 297.0, 314.0, 319.0, 291.0, 291.0, 293.0, 289.0, 319.0, 317.0, 291.0, 288.0, 314.0, 319.0, 319.0, 314.0, 316.0, 314.0, 313.0, 317.0, 293.0, 289.0, 321.0, 309.0, 317.0, 322.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7550828949863245, "mean_processing_ms": 0.21976950142528295, "mean_inference_ms": 1.358698286827804}, "off_policy_estimator": {}, "info": {"num_steps_trained": 11976000, "num_steps_sampled": 6387200, "sample_time_ms": 22150.153, "load_time_ms": 39.223, "grad_time_ms": 10918.039, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016517750918865204, "policy_loss": -0.00551184406504035, "vf_loss": 77.22379302978516, "vf_explained_var": 0.7703518271446228, "kl": 0.002163690747693181, "entropy": 1.1175265312194824, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6387200, "episodes_total": 15968, "training_iteration": 499, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-05-48", "timestamp": 1660262748, "time_this_iter_s": 31.469547986984253, "time_total_s": 21155.413346529007, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21155.413346529007, "timesteps_since_restore": 6387200, "iterations_since_restore": 499, "perf": {"cpu_util_percent": 36.49555555555556, "ram_util_percent": 59.12888888888887}} -{"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 605.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 302.84}, "custom_metrics": {"sparse_reward_mean": 209.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 186.08, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 14, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 14, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.21, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.69, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.45, "dish_pickup_agent_0_min": 4, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.81, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.52, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.21, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.69, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.21, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.69, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 633.0, 639.0, 582.0, 636.0, 633.0, 66.0, 576.0, 630.0, 636.0, 582.0, 582.0, 627.0, 636.0, 639.0, 627.0, 421.0, 587.0, 582.0, 633.0, 627.0, 636.0, 582.0, 627.0, 630.0, 630.0, 633.0, 633.0, 636.0, 582.0, 633.0, 570.0, 579.0, 587.0, 582.0, 639.0, 636.0, 587.0, 630.0, 624.0, 582.0, 636.0, 582.0, 579.0, 636.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 587.0, 630.0, 587.0, 582.0, 582.0, 630.0, 633.0, 630.0, 624.0, 639.0, 630.0, 630.0, 636.0, 633.0, 639.0, 624.0, 636.0, 636.0, 578.0, 587.0, 570.0, 587.0, 582.0, 587.0, 582.0, 573.0, 636.0, 630.0, 636.0, 633.0, 600.0, 633.0, 444.0, 630.0, 636.0, 636.0, 633.0, 636.0, 630.0, 582.0, 584.0, 639.0, 633.0, 630.0, 587.0, 587.0, 582.0, 633.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 314.0, 319.0, 317.0, 322.0, 290.0, 292.0, 319.0, 317.0, 312.0, 321.0, 34.0, 32.0, 288.0, 288.0, 316.0, 314.0, 322.0, 314.0, 293.0, 289.0, 293.0, 289.0, 316.0, 311.0, 316.0, 320.0, 317.0, 322.0, 304.0, 323.0, 202.0, 219.0, 294.0, 293.0, 288.0, 294.0, 319.0, 314.0, 322.0, 305.0, 319.0, 317.0, 291.0, 291.0, 308.0, 319.0, 314.0, 316.0, 311.0, 319.0, 316.0, 317.0, 316.0, 317.0, 319.0, 317.0, 288.0, 294.0, 316.0, 317.0, 280.0, 290.0, 293.0, 286.0, 291.0, 296.0, 290.0, 292.0, 322.0, 317.0, 314.0, 322.0, 296.0, 291.0, 316.0, 314.0, 308.0, 316.0, 288.0, 294.0, 317.0, 319.0, 291.0, 291.0, 290.0, 289.0, 324.0, 312.0, 288.0, 294.0, 314.0, 322.0, 319.0, 317.0, 319.0, 317.0, 321.0, 312.0, 311.0, 319.0, 290.0, 297.0, 316.0, 314.0, 293.0, 294.0, 288.0, 294.0, 296.0, 286.0, 311.0, 319.0, 316.0, 317.0, 316.0, 314.0, 324.0, 300.0, 322.0, 317.0, 311.0, 319.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 319.0, 320.0, 310.0, 314.0, 314.0, 322.0, 319.0, 317.0, 290.0, 288.0, 288.0, 299.0, 285.0, 285.0, 296.0, 291.0, 291.0, 291.0, 288.0, 299.0, 291.0, 291.0, 285.0, 288.0, 324.0, 312.0, 316.0, 314.0, 319.0, 317.0, 316.0, 317.0, 301.0, 299.0, 311.0, 322.0, 221.0, 223.0, 308.0, 322.0, 319.0, 317.0, 317.0, 319.0, 319.0, 314.0, 309.0, 327.0, 314.0, 316.0, 286.0, 296.0, 293.0, 291.0, 324.0, 315.0, 319.0, 314.0, 314.0, 316.0, 296.0, 291.0, 293.0, 294.0, 296.0, 286.0, 314.0, 319.0, 316.0, 314.0]}, "sampler_perf": {"mean_env_wait_ms": 0.7545081908133792, "mean_processing_ms": 0.21966006909098515, "mean_inference_ms": 1.3581228643420677}, "off_policy_estimator": {}, "info": {"num_steps_trained": 12000000, "num_steps_sampled": 6400000, "sample_time_ms": 22245.607, "load_time_ms": 39.734, "grad_time_ms": 10935.257, "update_time_ms": 0.002, "learner": {"ppo": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013599375961348414, "policy_loss": -0.008364356122910976, "vf_loss": 75.6484375, "vf_explained_var": 0.7971202731132507, "kl": 0.002114498522132635, "entropy": 1.1208560466766357, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "optimizer_steps_this_iter": 1, "timesteps_this_iter": 12800, "done": false, "timesteps_total": 6400000, "episodes_total": 16000, "training_iteration": 500, "experiment_id": "76af80a55f3b41ff9ef5f6d5d2b34a64", "date": "2022-08-12_01-06-22", "timestamp": 1660262782, "time_this_iter_s": 34.34099221229553, "time_total_s": 21189.754338741302, "pid": 66422, "hostname": "Alexanders-MacBook-Pro-2.local", "node_ip": "192.168.1.9", "config": {"num_workers": 0, "num_envs_per_worker": 1, "rollout_fragment_length": 400, "sample_batch_size": -1, "batch_mode": "truncate_episodes", "num_gpus": 1, "train_batch_size": 12800, "model": {"conv_filters": null, "conv_activation": "relu", "fcnet_activation": "tanh", "fcnet_hiddens": [256, 256], "free_log_std": false, "no_final_linear": false, "vf_share_layers": true, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action_reward": false, "state_shape": null, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_action_dist": null, "custom_options": {}, "custom_preprocessor": null}, "optimizer": {}, "gamma": 0.99, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "env": "overcooked_multi_agent", "normalize_actions": false, "clip_rewards": null, "clip_actions": true, "preprocessor_pref": "deepmind", "lr": 0.001, "monitor": false, "log_level": "WARN", "callbacks": "", "ignore_worker_failures": false, "log_sys_usage": true, "use_pytorch": false, "eager": false, "eager_tracing": false, "no_eager_on_workers": false, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "evaluation_interval": 100, "evaluation_num_episodes": 10, "in_evaluation": false, "evaluation_config": {}, "evaluation_num_workers": 0, "custom_eval_function": "._evaluate at 0x7f89e25784d0>", "use_exec_api": false, "sample_async": false, "observation_filter": "NoFilter", "synchronize_filters": true, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "compress_observations": false, "collect_metrics_timeout": 180, "metrics_smoothing_episodes": 100, "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "min_iter_time_s": 0, "timesteps_per_iteration": 0, "seed": 11, "extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "custom_resources_per_worker": {}, "num_cpus_for_driver": 1, "memory": 0, "object_store_memory": 0, "memory_per_worker": 0, "object_store_memory_per_worker": 0, "input": "sampler", "input_evaluation": ["is", "wis"], "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "multiagent": {"policies": {"ppo": [null, "Box(0.0, inf, (5, 4, 26), float32)", "Discrete(6)", {"model": {"custom_options": {"use_lstm": false, "NUM_HIDDEN_LAYERS": 3, "SIZE_HIDDEN_LAYERS": 64, "NUM_FILTERS": 25, "NUM_CONV_LAYERS": 3, "CELL_SIZE": 256, "D2RL": false}, "custom_model": "MyPPOModel"}}]}, "policy_mapping_fn": ".select_policy at 0x7f89e2578170>", "policies_to_train": "ppo"}, "use_critic": true, "use_gae": true, "lambda": 0.98, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "shuffle_sequences": true, "num_sgd_iter": 8, "lr_schedule": null, "vf_share_layers": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "simple_optimizer": false, "_fake_gpus": false}, "time_since_restore": 21189.754338741302, "timesteps_since_restore": 6400000, "iterations_since_restore": 500, "perf": {"cpu_util_percent": 34.074999999999996, "ram_util_percent": 59.19375}} diff --git a/human_aware_rl/ppo/trained_example/checkpoint_000500/.is_checkpoint b/human_aware_rl/ppo/trained_example/checkpoint_000500/.is_checkpoint new file mode 100644 index 00000000..e69de29b diff --git a/human_aware_rl/ppo/trained_example/checkpoint_000500/.tune_metadata b/human_aware_rl/ppo/trained_example/checkpoint_000500/.tune_metadata new file mode 100644 index 0000000000000000000000000000000000000000..1cb609180e0541d45df640dfe4b966df758b3d89 GIT binary patch literal 34495 zcmeHwdw68WRcE)}uhB@F_ly(Eb{sT@ku|a>*u=4A1y9HY5=g>?Kp+Vi10f^^J|<3xlaC}gA#9cp0yccGe4G8S_uGWc1{U^r z>UHmx`nG1g`(Ik$%&FI@s#B*L+*7HSYb)c~s_TrGk-2EQuDufQE-!1BY&Upsz&iyB z+?>Bsb10@Qe?zdb{&;XtFgDxH(>Dc~Oy;itz@GrKDL+`4?vcDHP6dwvkMBYuR>84L zeqqHrQ+1ad*9&G-qJ9vQbsgJxa*>oeAI#Q0zgo7+j_($;o@mxcjC>^7m7p+K*gf5I z9sT`zK~*=r39l&7hIv~tLV00NBf7aHJSFMgT#$V~)pzxD&tJ#TM!bSu1O1Y%QM#Ud z_Pb;E?#mmx;&_rS6t@;DZg^91rq<_TXa~U_+pk)+Vs@!sv(UYezKLk<3SBkkKmxv@ z;^NQ^HF5NY)EvD;YJh(!)Brt!nk(v_ldqRBdM=4Yqs_A=(6&vkS3qJDzBQ{XnSI;j zN+5B+Vqea!UW9z@Ylxk|v&P65Pe&6DH7GinF^!QE=(s#C9v8=!S2xR)z@4UfE}6I{ zyh^SpnPpA$BoKF~R`va2Wl;~vZ6~XPNf{D2mWY2z1U2DQ;|h|wl!%`|(1B_Nxj2Q z#<2Cg)ws3c`v2d5;?u8wbV*aB$7$nShDUM2nrAZf#aY#az8J?I{Wb{fya@V|8P|lq zDF61v__t58AZez0$L^Yfd@g86O^iok7WnqHYKm-Fe3U=&@t=fl zsELoP{kfu7Xfj=6KEjyat!UZT5ZmyjJw9FT#99eR3dNP9TI@6EFQTH90rP zlE9HBISgasbtI7$<4-(GQYFc3YLX*?to>dU8~)^d|Gz*}eD=G$^q6mmZFsb4%r`_f zbZnY?wjwv+j2=(>8e(5O9Z~7&R!o^hNYl|AIgo&#KuZ#O z0#_PR6XOeM&wbx)N=;N;;yu)mnq=PWm2JJK9sHgKxP~WjF>i!w_z_3lLP3ldaojD% zM)|NkY61mUR%Mx(R=`uQ-{>7nL`G=&=~Hl4LMRcH{k&lskdf7nk{!)7^yI7g2RGyDu(t0^L_) z(@=6e?^1=D?vtA3OW^T#*-G#d3=)YB8-w7roAw?Ft5%S`h)?3RjPU(rQ_N>e|>o2 z-|*VJm&q0r_9ke->ThrcI!l#gsq#o(ElTA7!ryEdnWV^W;Nz4V}Fy+$lK zm93ZdD-w(S3_6p&7r;~tGn7Knv&fhwm>uQAs`oXiAV!O-G}I7eomYk$-WGf`Z7pDb zi5ep46TYn8<0G4jr`E*qWuulAe&Mpht9T#i^CN>p8_;MYn70RGGgA-k7lh4BJ@`+` zV`3f>5h>tI=p^kqE4hrMw`@)_*&lwNN-`y%Eofw2sX-2_&`A$IuaF;pT6w}sx8mr! z&dPI|2+gMUtyMIjM>o(^8d9h$H8Cdp*-_mtll^--M!4YN>RA=@@E5g5x68zY6zW{2 zuF#{aFtsa*$G~_&m25gLB5c=mo>j;?F<1r=jRHJxi1Ws<^<(=9!F zgU!_Ka%JmYC%iRFd(Sa@_lLF5yDpYFZjI&nu5R zp*_MyyOz)Cm?7oyrnKi}<*D7LJXP%p%P|FN&+>VNY)Ey_>6jOk$DLQ6nog=s>7-HZ z8Pc99?HN@b?}GN6SDug_5z(-}Oz#-vny#XDL6u|ZY0^@ER3UrME6?&-roiPF6mmE&rc{ipJw~*apVmodwP#d&rj*C@X|1MWm~g?m!g&=F z(xXXrhg6cOs{zM!Ij$jNROd1=_vx7P%HsUQoG=Cv3$9 zm1L?2IX|zGeBD6f@y@Cw)8u8{imfHro>$1#Ddq9bY0r7(F(Of(QZfF7@_5=~`aPV! zh7^uFuRL4lF?UqsXyn=jjeMW-7`B+&%O>f9!ZFM=L&mh-^o1eAa3joRTA@oeQ-Sxg zs=_<3Jr}e`$84=4oX2Vkc`JX+fUW5&8qhPOvYSC<2y@TrlJC~39!X+=o$ zsLCFetjpPovzbDt&Ik1V3fB60>>S4UK{6t@T>FewUGS`$Tg~Ehhzu3WV)F~r@^4gc zf8vU^yHeQRvM(9m7laSHY4)7<@yluJ9PX>traQ8|oJ&n^MpE17I8vcQ zZqyG^f$B+x+@#0x@G3%>%x`{qA&BK&6pdF3LP$hCBo*e-h9w?nKuBh$KJm~0y!(L{ z?@$LZk%V(Ub@$)CKKC|0Bu;2hxm&F-7HV}LnV$RBsbBonzdyzIcq^4`!8+p<7YjZ% z)AZy+lzPqLOCS85BZGC`ldaY(*j@`#SOd;A;&7dJlHF?#g$+ zDa(fh+2$a?BCcF^mk>%mAk*kgi-_0nSlVl-^{%TvbtuJq3RTaq*kx3>$2(mfU$Ber z_=1C1A|+>K+{x8JT?NN8RrJ!SkGI-Np10#YR;lIPc0j;^`2sT)_54JizIgzjb;RjddWRu7&6< zq%YXOv5Sa+5f&7DigrNZZoD4KTOLhZXtwPxN~yi9h!Zi}g6TdU{n(`v%p<%qNugus zkgJUo0!mLt=ttv_Op_QD$p=tM^^CRXR?ql_VBO=pOVvdy=bSELCN#|enFA_Lh%V)9 zpPqjsScV+32(Oy=BRSUVd{3#0H%C^3hOkMROnTUL!m0CibZo%)R?(l?YIO-Gh2rUw zJa!B4_6gHyL@4^oImrPGD;msrEVWXt`<4{AfvgKTrnfuQY8jn@gDZ=k-_T$uwy8|J z=TF=^eSGeC=9G13#+twTly%SXHwSqBG;-UWC-1Q4@0^}}<2=RPde3bl`sCdRZ;I~Z zdj&SYyajOT*39&ASx|}(QK6e7hea`)V(WF;#t3=cg{oV8fS!__OY{D6$u38UAHm76 z>svs2b6<`3gPLQ;RW4CAgX`6>Dnq?ICrzQgvZ@QhdO%WHDZ_u8?$igYSk7 zn=kHt&!=`pVFKxo7O9|bJ(c}iZReCqDm5}eJ--l*vjLv zp_tKdnvA0yA(nj$wvUyL&eJvBS$rurf({9zltT+$P%=QgqxA8 z(2Ye}sx+4@J5T)#sxX(*dmB{T{wd?a%+on&rL zwpvD6boCj^xL|?mNk4xSmoHA%5=%^J1(Y0$s>|{S-Ezt{nhcsUymQemmu(E*m!5(G zH1ia1!;R9+E%OU=U;iH;`!{F>q;;uI^FU1i4UT;t3Lj0w2Q3=MO6kC4EchobVTh;$ zmL3u@W?)`M;z5(BMrOWb`@U0=gJwrQTXB4=P|W2VEL)fh=2B_=bNG-`vO(TW@p5{lGL^1rGt9r3{VAv5qIi-F@izgBq#S)&@t{aSC${R9BE%o_ z&W;c#S1bp!Q+!t#L%HzemsE{39|_|r4jl;eQGPItE@SY+cGK*vg4s!aU`w(PDe7mW zMkoaPi(&;jQ$#b$rJAnbvap0AsD|oNw-#x>SSZ@0rK*h+kKhp_XwPPthz6t*5F|GR zLLcTldckdHmoVDeMFUUq-bKvA^q3l|IPZPHajWujjn)A5zFFKmX85jXm?jme)bbG( zuS6Zg!pG3l*wN8}=tGT!KpzE*=h=1MD-8G`HbNWjru$aRji7l%dgbC563`*uee;o% zel=TwRZuZ|lI}yP6FfdptYAMAA@`7^6E}ht{G&9p0FvyB@uMa;nNG64%PZ9Ld8|eY zaA=t(p$5~tu$ItvDZ_V2U! zpe;%V-xV&9APLhc78T6bvG`nFz15%X%5Qskl zhaDj>E~2jmoZZbP;9x$MN_Lx6vfJ48B9^@l&br5b6wU^_LB{e6nUJ$?JDLDxU=o-PU+l5PA8isC&f-kXHGcvI?KRAfR?9I}7t8{)64m4ACFI+0%r{Eg3lUWqfDki%Rp$%pOV)ekk z#!4k~fRJH%K=wBPb+#xX3IY}dWPlrNN%%{E>#QtdDgsskaY9lfC(V}OFrPpjV1u0& z@C+al?}x*OX$>zqQLTwRAh6y+SShv&XPvEysQ)D3I|V!os5KDwpon@%z=r|ZBLp5L z@EC!|0oK_%;0D_e@Cm>Sdl#Tt>lqgzwbrwDBNSQP*^>xwuyX>wN5G!}1S{2A%mM`2 z&l31KfDHS2^3rn2-VcD@`$ZG+0r)o92L=3)fWIW*c|Zv8hvA@9_RAvjBLe=4fWIo> zM*+dWUn6IV{kjZ&jGR&SadIZvC#3TmGV(X!n3aJ&MUg4?The(N&N};T3Y=oU180N% zu88_1V21r3AcorSi@>J^{0yL+dD$~E^$+2!vpQV=F!p*c(%s@Aq(iHE(n+%_oXTWBVFqRhx8nqR&I4OkqV=xbuVV5$#JI*Qcn&uBv;F*PI$cpcU9c-T7&Kx#2Bp(S2>n1xDr~-D zAGEC(a?kgLoi9{L&1sd46L|53 z7t1i+^o1@UiM6u)oy1+@x9TLFOZ7e!E?K@30hQbz)iq>6BQ>{(LlPgV0e#B_tz1#a zJH-huxoxLuu3DiC~!}o8k!?H`Yf~~1;kiOvfI(ct{ zWSFR_%$p?@R8(F1?1D;9l-A9N$pX6>b4V(AVHc~a`b$vb^wA6sNl2^C5^u8*8gYcP zXp^w^7&FD0TfQV7g8u>^@fLBW2lYqp_vIF6>T*6n{_H#XGJpH9xe=QDx0|Cj}!qL!cACPJ5RLZyptG&j z{uT#X9B6T%#eo(FS{z8?Kp>vLX%!+=E;%^^ZJ$}zgX-0hK+m3IJp5pz4eEXdzP6@OFd?$sc@6Nm_lC^jG_=ywNN#uvw zU7B~?HhIs9V583HqMG;3t>D0ZPM99mVc6*LUFzV!Y}c%;4a?V}ze1OvL$P>V=-^gT zKk^5zu#<)vEI&jy1z8w>=Eyd<j*Gf}ce13xOpfAN_VcjmSSAw}WwN3Y=(tYU;{0|NvrgK4(`F;y_ z(b*+XWmy5=9j`-L?m`kRVWv(P2OD8T@Pt*96|i3h8|(OG3vvE9+N4)t@d{HiK4Rrz zi7T#kJ;#>=4)-@tp_nx>@;(k9`v`T;fMqS$?GhQz6Gh_~vC>WnSFd#aq71#qZxW{b z-p!a9qMYD6Ez7P{@DhY-^H2nA3kEG~>5Q@hr6?H4TG}d9hWjkC8HR;7QrRbnj&5L7 zvjQ1hgwT1g<`uzx6w)F{7P~U1ux1D9VyU_S+jgMhJ0Fh)kKHq&WEnSR#^@?|62eG# zXXp4}#8g%qd-r3Cq>EiLRK=|m404?sy~V=)o3f5qFOC)<3B6>qEbjUPnBBn$u(Uuz zrCKq3N||^~n*_{^yX9}8kfkwMMyO{iFymC#4s_9KdMsc8_%X(Qp5Sa7#!CNe-|VjG zXV$yj0Ux8~;dDB+o?dTTY+Gyd)2!`$`zppvIs;%~wXL=-b-C+)=X%F_`@?N(-K*WKYa{h|Dxc1^=Q#DZ#Tr=J`=i1#&dH6c@x|Vc#w!k_!GSbp zi(S)w>zFN8Q8LX7kK!Ry9=|YC#~TS){?VF3w*QzG#Igd11`Y4IN#3~L9L#+^J)Oen zTd;9!jh+Ce7`OZ;T-0I;fK|P)XfYiTioCXTal$B5EK$e1@oZ(G>d~x+XB_lY0Cd4@ zvC!{hR_Mh2KFLyB1y7y2^kUns?zxeJ2v9BO{S**1;woKO+`#}5@_`@zfpB)vYe7tC zW4v3k3`_e+cu#LE-PuuU{Ggm3Crsfg;;4z(sWFI^G#a>yIck}hZcPo}iKiJM&k4I* zTLi&AM49aPiu1jOYr))S)6-)qOqM1-3m?zmEi0~f*s%G@bW~c^;NsiWDHV9hcAyT>Kp((r)6H8Mh74rXN?HMfz zvuTa&-}Q5nUNQ#30|a^ihfxLXP1x(e${If#h2?`aPCH@1#2}`w5mIFo@ zSYQ8{6BCq?B6MnV*eo>%>IZ=3)n{VwG-$7SZAGp8>|a( zwx4y2K-9#uM9;w*F6ampGuh-lfGv^YQE$poX-WCc^%Xa1mOE_AsZE>K*ffff^9B6UiQaF%s-Nmd%szi88*19ol@uDRI zEg5LZKuZQ%GSHF%l7S>uB1fu(Lm*FJkpP)|uqA*^mM*Loi4&KPbUd42+roSxH!y8s zb;RSq{?`D@t2A~4fg1_@7yvFv@X<^9m?l2vijNc0x4dvGgfIT+?@#UvAC<>ZU@WxXyxzx; zf5h_7eFxvygCR&6zrRTZpulUsvO=Gx;~nOkvO~D8vFN&oK39+>+W`4v7V@`m$K{Vx z@F9FJs1erWb@X?&#KNca`eDOi;nJ8s?g-PU$#G$xh0mhlqgV9JG1%l}mugj98^itr g#xAf!Ffm7tO~90Ehi9M0`&rmS<%(G!MnL2L7yE@Q^8f$< literal 0 HcmV?d00001 diff --git a/human_aware_rl/ppo/trained_example/checkpoint_000500/checkpoint-500 b/human_aware_rl/ppo/trained_example/checkpoint_000500/checkpoint-500 new file mode 100644 index 0000000000000000000000000000000000000000..9e9c1bf4e4a6ddd53cf47ac2da80f4f464ce6504 GIT binary patch literal 568778 zcmX7Pd0bD=_kWxA4IxQ8vZWHL*PKzbi&DxGDJ@Ebijq)i-=rccBqduUq*b-?mF|o+?VIcuw8^!0GsgMZHiC#6$Z++bsM%A z`K=ENFcO^xga!MFx>y+)wtn5JO@V;{VWKV!_rCq}e_tRLxAH8igw3VT1vi<5%33~P zu^XN)69dx_9e9028h*BzkVY#faK8TlVlDpF%?mW5&GN_KZreuC%6ko_;m_F7pEX1? zY9GeBg@I$y61vw~o=T322iXi0E~}kMd7c9NoqiZk*z~X`WslMOt0tV3DZ!kdW$e!U z2sj_}iajZQj1w2hKuNVW_*VMi(>{OxHslFjdD#j1N_Xot6&TUmT+2MQM)S$F%tjNL2|QqRu|_y|?t`}f@w+-+V9OaAH56|(y+QbAz@yc+e2Yss zmdokWs?(BOGS!Ptntl)ilooJz$fB#(Eb(Qc62{uyGrY z+vrbxEDuBG3vbNJ_km@eBcK9XLu$Ej3l_ep}1gWqCB6$ zU3o&nGVIVS#akME;Q#P3=)Ek*=@rLOa@2H~mtMwC`y228i3L0C_v=-q}OLeM|c^vb0 z9fZkmz6xb+o50@cBY4On7{))t?!I2(@XRDS$Ht64Ye>SL*Gq9tQxl3GRY9iA1CPSCc;eEz8;McLyG)uzQbaaqm*g+0yTb_GTr{&jc|k=UwdElZ7B{ z`iI25e+gTc$l_R^O=QT&QdGJ%8E(y9#cr#dK&834M9ZZTqP5nc>(zV2xbQZfSCSN- zm?qCl)AYf^s~1Uc0A7&EfTKcp;#`!3f1)P9htql3up^sH6(0hwGTJ0QKcAeOluZoV z``Pd*_XV-b_Og}5^{hnXYa<+wFFyljfaz#c6ePXiVR!+k=0J@K<%8nn5D6vRLqwIt*m!2M{h6N z@0=}It?>x8roUzW7mkog<2BfY(I=qg;9dy5@<_1xZ+rEvar0{{<41zgr5U&!CgSjq zzPQiRkD%BVY}X<1?Xx$!9;pzn%Du&Si8+>J{9qe8>PYd#%Yw!^QS4Zk6^eb;Az@u< z==x$RY3P;}+6;!m-l?~k%B`iCw<;LE=)VzsJGdTI>v{zZ4%Nb*U(Ku}NeR?{uZH3S zXIb~E9qj3dv&?wm2GEQ9%ZNu6QB`tV<@FN-P`u8%+QUko-YL7D(8lsr@)H?EKDiledXKPmrKy_~_ zi8Y*K8C&y^bvI^_V$TJTy(Iwy=M6*4%vk8V_>e@safgBXDd4U@7DwvH0(4m5o?LHq z@M|SGmhwm@=`x>HcLZg3Q*hM1G{hE7UTKIp+q4UPsy{>B$UN38WeDw=LV_%G*YCVNe^QeXWla}CVBh=7glNlYpD}a8~%clK3Cufa` zs26AyPlng?@3XszCTEDD zc?NNt<%;pTGLUvA6qo;W!g-~hLW61<)Qgc61RTxBbF+p)d}JYZ6qX6;@)PLtWDjO+ zyKQ-xo^EDJAZu~ z{CWBtuH86*PV&du+fz(1sO?_-M^ix_!3^m&^+0X9ecMb;HKf!RL|uK}H2w*SE2RZ92Fvrync^$M6;D-_XR} zhyH!;0^aAQVCvNs^x%&V{P502Ft_9=wpJ9uMx#2k-IGka^cA50t0BL6YdYRslFIK{ zsesA^BRGF8japgBQkmp7^6SPP{A0L*E1VJV^xPf%<_R^J8CS|wpH@JHeIg&3dX5*2 z+{0fD>mdtPyYmY>(!oY>79MWvVqtIQ@XZfr@}**5U};!7T`8f(R4%4-l4=5_Fb(V` z#G$_3F5r`Hfbz24JSFb{TkTSV(y~+d((sd9tZkEJ-@4(DlWh+#tyQr*?IZIXKL!5k zYT_XMK~znHpmvfpI$Vy2fZ-Gm4NgIyd@j6b7m01zVjwmcVqsO`D6~2gFI*_=j_2j> zlSOLY==R(SqW1k`QSPN=Vzn-x?xew!pc@^0cf&ZxdWg*E29>f*%wMcd-~C#PL^q0W zuRIQ$r1Z%CaRTJ!$@uD29GKoo7amZGC9(>4$?07iEyErJ!lJ4fFms_i^yN>23XcP9 z-IQn|t5I&LkTsM(^hkm~m1kh~>k?G4?}Fv2<9JfoScs^4MI75d!h`QVJY!U@;E}^> z2r2mo5#wd)!Zhrku(>5Rt%>qTfXAT`h|4e-6h=lp9$TyKL>rYYIyzce!S(*NzsH4U{ZS-lVdZf zM29v11=>97rV*D6H0KKap9RBvz3C4bMc(|d5(b`h5~lnSeuqz^Wy_TDu;XTU9jZ!~ z{K{atccl0>J1MH>vJ*>s>cG>bAEsJ1V|pe$@_}^b-FmL` zXAyMz1k%Y>j`Vx?MbNnF!9(w_r>npC^Z8?9@o@Dz`fB`MqFa51d+Rpi*mafg_Jt$$ z*zua4|EI>?UmPck!;bUR=?C~O=T>-iB9Y23?g7SYQE5Xi_HKNNo8xBlrsWZQUVkU7 z{##C}itEAdy*vDT{sDbwB=f@P8>pW4kCdD=fai*T!O?h(C`Vt!rZp|4&lJS@#~43Y zzuXU{{o-MR!Cfp5j^~wo4f)n9qxhxE-u#5eUCYn!-@udRUHHayE&C9-6)n3Jxx|kk zZeX{YzrCP>n&Jw~a%>JfI5ZV>qFnG>kO9=CzK1mHL3XcH7U~)jkk3;`^^Unj#y^-} z`;ZL>hVEpp-!BlKff-mOZcR5`+dwPUeZrwSTj1(2pS_ zX;9fU{^Xbmeo1Zt*%P~I_2vCgyfFyQJ&Gg2YL{Sn2jw;=)%fjmVO0LgGd%Wb68q`6 zgLQt+$9E|~yz#jfwIAIIwtLo4`|R1gYK<(@tT}_jW{jXxZndmqfeu}`YYytH*u;0J zCkcmr&Vu@63-0YC#wFLS!jRVS@FnLOZmCRy(?eTeU!X0<#~0$&;kLwKMJ%lpugBup zPeK=iAEdW75kl`);_X{|$i>H1IH+a;f3Htv>}e?pxp=L%=)NjGf4vh<30iSZ10`Km zKIqk(4D0e8(Bx$r4jGex8o4$ozc?N0PT0e+ofpx!@G7JRj3VDW%P}PFG@CnP1MZeT zLM~s4XHlQ`31&z33MO1UjP+&5;EL2iR8l|7Mrl_PzsTz-Ie$2y@Dn+j5hUpBT*$tp zm9l#C>GM1+50tYq#hlIEKaigHn;dYe11nG}zc7RvDAzg0B< z%XUb4_6vr-(WUM8R$x+R6Ed;yb&X{OMC!p_k~Pr|ebkcSXu}vR4VlZLWz_NRpfp4o zyJ5_L9f&UG7pW@-{GFTMBXTOft8x; z;c_`fY+WSK+t&)ZBE$qqH(W8LrIWatj$qmDxq=t65-|8A67I$>2Zy_cFwi}Pr8s;N z4y&07=E26I`LTw@$iHT9r_MzkO;L=P$U*moV0Qm?0@`(H!{|emm`E!v7M4v$b<=j? z;LjYUSCj~&)u&ioSE4u}B!k%vX~YGsp2Fsl>a58^i`_eUntj(xLHp`QSaX=LLs_q| z%q|7p47OOR9NxkFKQa>d{g5!iJD;4^w8U52yx3K0#zVKh#uF0rSa41eEm=a~Me96X z^v9j6u4;qR_UE97Jf2l=u!SK-2tIZ!ZG7nEFIK%>bWG`D!p zYRc`o^qU~=Gc}aI-{VU^rhY&}^;`UMU@s$EjOiplS?==2f%>FQ!Q6M>q4sA!d2Tw1 z?s45u#Ko@Q`lT^c*{^`eJXPS⁣~5%tXzY1(XM!!FL*xJS=Jl zFSuNQ8?tBdy^=fl%JE^``b##B3@hSeR?ViJKaBYP!Rx$Fev{yc_Mh4zy`NBHUIa>> z@J62lM@ekn1Yx?#a%^zzsB3mphM*6+%y=mhd70_pV=;ncB%UET^8%q~);hdwq6}6y zG=&49xy1jM7ndyGQ zNtVudIRI%NFOY|dN7(h_V}w>}L-^On)4=e!CVW-u!RsSVl8xmRkQtW&;`d{5?X$(i zWAKa@=Wei0n2O+Xb3a#CTakr=Y*fsCrZ2hWoJRrUvcK*yps-Qug zu@ii5cYx}LT#}REj+w-T76u$537)_o6xi_mH`A%NRtBGcWN>&??rtMS{th_OHHM{^{6wh@uULK4M>Jh+gdUTJQ0WA3{&IsfR}KuI z(uTLW{F4i`V%Tkznz)mmkJ6+k?@lCN-`nukEedpE^l9$=sS{+%$8)b!`uw=1HI;r` zL-*P4;Wnq+iCsn^1dO%g?fR;GyOR?yw%r5UPj4Y}ow`AK-76TaG!xf)a+slj2%e9Df-g9B=rU4Uk&B5wOi=(WA6OL^*HXlaS^Wn;Z7^| z9AaAG#vmK{9xSI*Zq=+z&vlo>`0;jF?B`gH-7^F9BJn?nb}g9tTtP&w_m58Q${a z3=ZFR2X;ygrPV*Jp!-xEF;$SE`^F2IMXe9K(!K$U*JraEQhS)|=XAlujx}^wM+x{Z zzXBIN2)NbAPSg>Nd)u0+JiE$~znXd&;@gMPPoX=ZaG(`OF1iSf^S_~2{z5DY+zI82 zj`581*Wrwv5*?Cj&iovAfyAUG+-P4Y_p*5p?#tHG%|ri#;jdNU-sL@5C_9xpw~nGc z5nq}1(Z%SoMwyqjX5$?7$->Vr9ayhcAn;k(0XJk8qH>Wpb9rtL*+I+jcF04ld@TWH zR-UvF8P+D*@nUU%98!25hgi69rxOEP?luZ-T4aQT}vx15U7Wp(%xZ z5MbtuN<5YipM6W9sbWpF-z$+@L0j3$RkF-i=QtSV$B}lMk8JO^Ulu*9{^EyYxooeW zJ)oaI^n>d>DajV z09ZWH0sH)5vcGi_di-*P+X3Y$EGZ+OlWyaz@!#2yQ|dS_I0YqtC8D&8NPAL#Pi&%N zQKmfs)dC-|UBwenZBPkzZdXO+EBnxKVLG{8(ZVV_{NdDt268>>4jWvNL;Vpdb^ z1bubGff%pAeM!Ty%E$;~-}_ja>u|CvAs&|Bp9Mkh{t7?M4?=@C-YiFbHuRP0Ve_BK zkY#TSnhuIEkQq)syp6*=g9K*WtOL5ix#V+#gJ8b-X83Iti@8VUqha4yTzt2mC}n*o zp9^=9+`I2!ooxYj9oULP)Gy-x|LR$OdI5XIrQ!PoWxQoM)4dgY>iID z_Me4}z8Q_DEZW(Zco+1{(u10(FW8qSexT~S9*UR7kw1|hDCxThrMw@K?ky%vEb@Y7 zp2;TkZmh!g5M|8vG-gR<;q=G+2)<-9 zl03_tim!s>+&C`3VFGW8`6`?_!Wt(oH~_BdBj`9U4KO!2%8u5)Le08k%(&+n``(s} z8q+tTVWS3d=bbkNvwyhm;pa2luhZ^P{2iP&}xL@T@sb1s~>xGpRYfrD1I=tU?G4v^qhLZOa!0gUKX6WTZY`#Z>;)OV(R-_32HA&bR zl>riG)8XvhZD{QBRnRyg4uTfRv$QZVj7bq^G53R5-?v9ZK30??pG_6T{~vPfrYzA8 z3PI-Yi$8R0$+|nQSmH%j@Rh)_9)8ojC$Q-( zK)qsfYE_m4uL2kI^wQltx=9CL?pK0WU+>XHvw!lj+M~E@$r%D>qIGYW42X;b3Eqnl_!{#dS4E6I)1ZnI}Y_6sCE5~*R%0PuJLjF}rR%h)$GUa=$^$dLCiXbhpJNS%!x!_jLnhLUP)BGm zT#wh*?%`ay9I8I@PxA-?9C8q%osBaDnDN)J~Cp$tWPeWY8wd}HxseV^9Iu) zp=2O=BXc%N0ofx{5V{l5*x{d`GGYSh@>PLh`YXZV>I>l!7J}`kV^B=>kNIU6S=4#u zE-;n|gh2D%@UFER#Vt6F+F34Ig9@4M`B;*}rsG5rzNXHy!fKYu#-Ed-#$9Xh&e(UT zp1L0`sVZ$>dz_q{7Y?KR6d_<+Fb3%w!MFP(@YFFAa;JL|oVY(9lYYD5SyxBef8c~r zzibxzZr_GwTO;_(?{}Egv`RQ4;x*$Y+(4z~D{$Iy7PX&q7Jt`u!SOwNFmh1I;t;_*I+}K zGkhFo%$w)U$8*=rXt!((SG?{-e`O5A$f;NPoazj|cD^dzwPPGgh`q)+GfzX9tq!_d z8=;@K3-8hT!*-0+r-pYoVdIf#8vP*_r4mZ$)b<3r(If#P+*Lr?p$eCYYK!f^H}OKT zk&q`h3k1{T=!{Rp=zq6!=;@y0INv4_mz4h};sUdI`}P}9elZ0|d>a2D%h;+vHLzf> z8@I6PL2t8CT;#SF{vP*%k)vHu(y$0eEu2huHRB|46zgyNX1#gb^JD@V zxm+f{O-27qRJRb0n;?)%5Y>nCmeA_f794){Hjd8P$z`m*998bX5;y_^@y^ zI4gt|4wvblZ3?t~U?Uc&i1HnOmHPX>uKbZ6<@X<)6tF_ z>0Hy>ggdU<&CBQYV#A+0U_JHB$!j~m@ zU+B@*69b0@4DVh^5XYS4Dp24X)KT8?f@ z$6>#Xa3EQV`P6u^IirnmFnJq`U79IaCfv;GX(r6+ZNe?9TZJJOmr!$6qh;lpQe5oY z$)-;DUS|>MfVqZWhXm|#1TOgw=jBG!=C)8Yh8mMK`1 zyN{hY^GdKfeg@9BeNWP>f5A?jR{T?Y8Xip_O{yQ&;^pEPJo<1S`F!gy_W3Qu*7UWo z!*xCzbrIpQ*iR-Gp@6rNgv_))5Z6>c74gj#Wbn^vGDt^5#>ls%r&0|whx*{V^>5+d z=3ykd%mj5zBH4$VK`1u*L#@q`aL_!y2HtlWftBtka>~D(8Shg;F&4V!l`A3x@vWtRq|c%487%Qwiw>0l2i9&!mHbUWF4qxbAy zpe!EPX-Kwc7qHU0ZZgt(GVxiS#riK5;1x}C7WAeBqNpz(^H~5d zI-$|4Huj|$Si6@Vm~P2Hn~YBOEKU*q+qXbA+b{IgHimthyNLOh<3uODU4X{^_*TmV zz8w3==4kqWos0z5Jllem3HJn3a)*Jj(IPTnCWkc@JFy`4aGF-)~Pe-)PA?`%cmrR zfvq!i*hzz7xF)-syc16BQGlrJZme9D_OG(S3&Z;cquT1+%S zXCFbc5M5rJo4~a{ED(yf{DTW7{#4WMDdt?Q0>z~D^rXsW%zix;4V^=vFJTp~&Gv(a z{{2D~g(NgknFLl1mE=q0die30pv}UWtYP~rCU%DkbrSxO)7r`qSE!F#_OVdyy#~FP zs-f)V64Jk04a}lvK~e7|Bh^GhxLQXM`74 zuE6(87sIE36=0?LTG%;Ghy0zV4Gs6U3RnK8k5Mj(WJ-cAJGyBqEPuTL#vh!F*KYyH zj10sLb7rFR@pSAC83X+-AMoSY#rX2hO1kgY3LYalkFK524^QVGA)%{Q;LVe>xb0sR zTKU$AD+asKNV(WCUl;~g&uO8I+dl}cAI(>a+P>Wv!oz%5(N?>e=sx5DS-$c)kaGgq6VT3m z#8s(DYc>rwGTmFEmyg-`&TvS5&dR* zFkX!g6raKF;hRa2Tpy~>+{~ND&f-h<2l35U#`3f3-(k0>BR}}66oz~&l^NtJ3X0PMvW06+J=)#nL$;{@6E~t(a1F4_K zSbzw^G^VV8W6kf_oT6pex#KhIo1+TC$*H)UM8PphWiTyVg$Kh*S%RuHb2fZN45XWg z7~jpJhfASOy#_q~YzZ4YdP&u4E$mT~r|G>EYp+XVo>>TYw%y0gVQ=xvJvx0}rZqgU0n>uzZC(>>>z-2FmG-win7 zNf=sK%k#87S|H8xEbk~q!8zG2aO%xuD%Vj4(lXJs(r*L5ef=yRnCZq0@$m24a9_cJIBd8MABE+jeDjKM zrJ*%eNDaZk8S~-eat+#jdk-nPyc32!6{8(D^m*5lR`@z85uB@MQhC{0a0$}El7*gR z@<}_M->b`e^VE5#)e^jOpbfn5{bZI=gY>c8J19QhhOadhx%P-k3_D-T3=HD9zd<&t z-@1WfQ#ZlHSaH0twv>B%OYmJ^#?F5B;{z_+u?NpBvVj%76nty5A^*`TD%&eXU*3wO z$6GA9TgG;-ojDxO&(J`HHP)>2xhg*x;>X_&r17#(pGa%#J5;UELC?9v=?uA5OugnK zZc`pfhl%w5>q`9?q125RaUY$$sSccmJS06D{gAP{fn4)Tgy@(!I@5m-jPA){Cx5)a ziG4r8@>l`O+c6X>%RB^qsfF~DzC7<$+s>7)Ns^G;l@R~P@+ma%WyuA`Qu7!-1o81>t;jX##;VH?kx08NdT+c5LorHR`1BmzN zNpw)!552^XVt1xEkF6RbSpUs|wtds!+CAI&J6)K`=_+(Q%WVOYhy5m`}E(j-n)nYVKvXU;$^yA5DCVcF=2duH=0w&m;p}~r~ z@LbCgnjqhgzSh3DXm}>Io!JQkUeQ#mScG*I3iUSqo;>v92T`p%fx?KHAYLOu7d%Sm zQ)X}Ca~Dmft!`B`RU(_t{*(tJJjPMSnIovYiY?tc%Za{yRLNg%+=8A5T=`Y|aa`6s zmOS?}p!*ibPW}YpBHKf5)nMJVu zW-K(AW4oU*5Kt9MK&Vdf)$Gcyex>uT8o^ZFe02P-gpkeb+EKC2(t}zST(y;#D=8wEIN+=2sY} zGY9f6JR_ktQ!vW=2dTU#BdSHmfZz9B^$j`vFk&cu;DXdL3a5febf(C zYk#rJHP&cd7l_V!aget$1;@+lqUP}ta4^k(-;hUj)n!n6=p5_0@|Z|0 zea5~T5V&)DI1c2k0YUm?Yz!+TErqTqyC#^F-dTapO3%qJGd~E7f6ZvC0R$cYL?m`4 zq2l#;h&PSJnjd4Z;d~UFAK{EO=6=|z@RcaWhQVv|FqAJzg6-WWgy~iHaF}8iM8}Py zBQH7AfT{DLNYR0g9Ejz)yDYfgs11Aye+Ta-?S+snaZG;tJ$(0jJT>voB#Y!$(fj+; z*h3=^=>2&LZd**m2QxkB9v^$KUmb(*=Jzpmk34wi+XDyY&VeIkTc}jW+Zv0+u z$QDmpA13oL(`0D*qC_^?F&Va&?qEm$%LP9V zJ$@@)5fq%nsG&+Zc1$`AJr|J63bR4pqa5<0PN98AIO|Z_M&lN)=8|~@_-l#^ZD*Za zIYEwhuX4aIW|r{%N;56p=gmit-@{#)KMg;mPRmA523Wn0fBIQThnBk2^D*Bcx>KC? zIO;;fYxR}H}J>Kf4esDUnxewGJ4JBe@YaJp}H75h`1L%zglalqUq*S=QXRlBtN{nkCh{Kmd%5l@i2AE@;08$2BP|%yt zzm*)|$+Kh``SJ%%LqEWOy65qwbP_Q%a)K7;A{I5{8ir&i;l~aqG@VrdNAFs~zSqgH zWzRoh+lcdUQNa=VW~E{EUpL;oBocPbFoqk-4~4Vkn@RiOi)bE3sZZY&XtzECyJBt; ziO6t_^(;bpug`cuZ#VywHy#4C(!e&~m9z9^RN=-WaLaVzU-D=2Hk;e5RQ3S6Wa-gu zBA3 zYIF)8L)ABiapRNQsnUBtUR<<-9JnVzJ;W!`jWhG1D*hO56ANJni;MB2;eK4zw3=&* zaQdcFHdRrMq3amX5$k{9+YlXo=|}<}oB10*W=rz7ev&k>>J}E<9S8Ta9qFobS$x9o z97z2Z!1dTYh%9v@w%UU*w__>yD$0Q;S5L7^hT(X%b2!X&e}SDIW2x~q6>9&?fXRj@yH9^CIp#qZ-?xy<`f zywK_{>}oi~qh6R((+@N8zq99r=UvuAN9%HIZavG1#Rp6_TSG@3jU$;RPjGO41y&dg zp*!!qz;OZ7_zksZIG8D*dsfTg(!%Gczr~QB-7CTec2p=QzZfM}hLP#x#HskjU2vtZ z6(pB-u#>4>u<^n;B7UHS2tSm8&x2ins>M(*(vMXg57K<)T6}ozxxh)?o!A7%lIGoq zK~YK&=9oEL_Ax|9RmVsa|H*`T?bX{354Dc0k3| zslpGX&+x*6mzZ)^m;X67jRy6x&|2r)$p%5mezgB=7(+^=F)gWp@3v1ryVSf8gH!)h} zg6F+fgGPNR$jtC$E5+5Ju{svl&xjNFmAqqVBhNr$U@?w|XIJT#95KiLg}zeYu_0Uf0)@a{2ln6q>Qrbnk@ z_CSx|)JS)Dkd}hxM^i}A915>nHDTHBSa6%@jnXUJum*97MqL-G9i>15yEu{fqO7cTlQhVK^x zaIBIp>9G4u+$Z0Gxuc)qns3T9>UJC4*mwYT1yq6S#t`t-D2EXt5>O?YFWdgTVM~wH zz?q@j(0lG@oS$igTVMHd>F;V_+_95){Y!xteQ7YKEsv!H=!0R>exf?*h49ycLu~NS z6LP)83w6Fpu?$l;h?1T`bcnSuUyy`tjhSroritit+@2|SYGOmJHECHH20cA@*u3{1 zqB)lU>T8z4j@8lVBGo4F(LBq(UTwexg%mKHw-Lk=KC`z96R}EE-`=S63E8Jx!W*$a z$$IB@;y*A6FK5ojx=9Bt_MDX_iupQ%PfGtt!O?82!J+WiSPy>Sb<{U(B?Zn~QKiNX z=X_kq?`ZwN=ZWUHa|(NFD5tyidkl$)FozN~q88(R@$!b9O3j#nbq6dYBKr^o*?(?M)nSJd9Y6@t}Wa^q`a7 zBsOiM25k}TJAHmdn*T0M;1_k*@}dWU^swd-{&#~hZS432Cub%>=35zV(rH2W|0jhK z@B7)i0@2=$vQqBnHweR$UEZNlQjekX&SmQKj z(tX*UAJy(dP^crXLu7f;fgbjDwLBG;<>S0vYjJk07Dm%xR;=1bri3N(`R|2L6gZ40 ziS`-gY1ZT6Q48T=<}Tc-ph7y%^fAS7!fWp*V3nyX$_0<3{X-71fZXG7{lP)@NA4_G zjyn%#PZq=CiDN+W`90_bhU=aevi|Eo@WyQy(zMDBjAm`eU85=?YK;zEqFaT#U!2Bq zwg&uGel?rhdk#A!ZE>CDd3@Hj8?8mLP}d-ZJFLu^PW2SBt$!S!@WqUuSh`8r&|M3r z8_t+#SK9>n;Lu%952kPmKK zE5b?v6%!NO=-oLTRo5;qC8iFpUryG`ph%AO+E zw(O**tlnYV6E`#nm!_wGh|z=L+o=7MGPM1@PUOwhp-yhu_%>XTMBC_6JAYGpx8fj% zJ|9QtZEk~`Rx%J-N^0t=N<^7e>~+gNV)M)b;l~x@(Fd-9LB{{w>Xb z(7_>Ut6?4KuZ@T)B%a7uC~eeR)`Sv04(!he^>5n{I=q*kK-h>;`I7U9)_6JOih_ zA4xYD{XdG%JFe&Ni{qsT}PPhLKPtA!KERkR(aFv==JTG)h83^||Mg zgv?5ktn3sDDeKGc{rk5+d-VBq?m5rb^QFQSyR3MGwKrY(`~Y5!ZN@1^3;D6G$1u6Y zjY?!IlaS8>FB>xpjd#9+eF33Zlr)8U6MDW*$JLuc>gRa0{Q67zQfZ0kE>-8b~Gdg0w4RJ1Sq|>}wO~ z=6Xs8mG+X++k?5cl{WJ{b{WnLF{YpE2hyMe{h<7#iI35gq(+fZToy<1Mcv2Q-eW&V zdx|sPa#z^RJ__YFI-G5M;tkLJR<4VEd$VxznprlKBb=#4~+Fjby=x&RY=z0hM!0xLNy4Usj6 z*p4-KSx((Z@zXzD*tYf-HnneuyHoYxym%?L_>Y5xC+QIPbQU=?Di~{*9feOxUA7*X zw)kOED%ldLk6&M>kQGX8;%|HFaZX6Q_{lH8H+!9Mi+3WEaxBAleSu8*vm;wIBN5$R zgun;sN_aT;0Q1>(bUTZvbrr`QgR|nX+>aX;6{|l(jnd(Q&~!v59w(; zO8kB-LF$#jo@U=?5)YlKU;dE71iJv1Qp}mc$VgOGzE9qc8HuYlj)WI13Rk79fi1J6 zV1Sf0L|&Z+rDhGpBlIL6vBwDZ99?8(en^M7` zw(=+Iap@KXYwv}>1$iv=*|}+#8nAv%?>j9bbu)w|!#+6xDI;k1enzWIE3LvIXA6s=~aH zlh7*MpI$4QOgD%VXnp=C(D*cs7wkl+68eebsRs7QZ9dj+vF9q{A=tb(9-2-4m{0F_ z_9Ji@IX685C-sJ+-?BwSL&gccp6-N%$SQJ{?<5soD(r6MGBhy=!k|k6YcOpNj2L2r zg{NfE@Ww`x>FbBf%_vA$U1fhQHK1dgIh-i+gRaVpP+tEQ7vzmcpOkp=GpUET*5!+i z+lCXj4I6lJcZ6ucpm)T5i8oj-{)XRP-9V{y6Lhc|#C&)y9(XkX>N6i<`?4tf8#e+E z9@c}19z{?!I)$~5WVqeWGI8^(B}~;MRBV6Q1~N@c$(p+1@Jl|HdCyn_;l+O$wbukS zkF^jx#TGC9NAb_wgUl|n&z6Wa!EniB!KL8`V~ggplTY4>Pa0MeRf&mYqsIm;n!Fk& zTnxwd9|26!Y9K89VFkbb{S;sQHU;!n$^p3u@Kt3HoGfDaQ0Fq6n5Br*R@joHTgH&T zcmjEHli>WM-*}+<0W?+4hN=Zm*e8#8d|H%5E?!uSiP=Yqi{l{3Z#0AT(V`PTRso3emi1p9RQv zsqjgAx54An3;Dd$<$PrRZw%h-&0jkFCaR0Vxl8VKT)Q*?bWbM|=jnPZR6m!W5OxZR zF#=0dI-jRk4T8f1^U*dz2TDeIb1j<&ei=>S#=>~e@mb49&1j@QKN#_2Wr@Ubgf87; z|B;uDuE*m#vvK<+A~+~saLJWwyh<;F_AQLzinSKFJgN#uTr=jG8rfXSaS!(jokdkU zM)QYfBj|1uOWIOi3rj;E;A)jN=(hbr>_24To|zf6ttk-2?|Q(=K8ZU~4IFQu!58Sh zz^iwkz-jM5YPayT$nWPin6StI#!PNvE^ps6e@8QjkX1ngu`Jq+$R>vS^hD0NfWF1{ z=y%oz%Rk28O(i|j=A;TT8@wU7D+-yW74~I}gC9F)6I&B=j8thOZ1+g~b2W{Oniqw= zH)U|dWfPpT?kVgl&R{>I&p_m~^SJ23DU$K_I@va;5yjt7^kc?$YzYXHo;j>R^*Ww+{J<{k3&K#{ zcy{kyJeJ&SBX%dcQEPE0K0T8P0h{uwEJL;Fcwtrzd>M-y6IJ<$iV{c^CxKZ?`+BX!|JIcb(yZeBY?-iMMXyW+RXBaVWFrV*HOtjBGg}D6{ zEPJ9=m3!TL@Sm3sx38qY@LE}}pu8DxwG8C$kA~6LFJGgD#8+_Y7)uSTMO^8D@SW=O z8RcS|xRXx+uhohci;F+P@!X3zHQ#~;nf%5Fnmf6Bhrm%OxO}crFTg3-`DP7)4e?MYx-XzUayRK8TawU3N>oHY9S1diDt)V z2z$egM|fJ(T(G_S0%s;3V+WRCWD&%$YZ=Dci{ciDo!jdi0fUYD!p zE}TPMSKIJItKCVXYC9}F^h$iCtA==GO{Q+kM)K)bi@{Us3&N^0v^u+iE;2TPNwu4q z(_mv%oID1f%INW_n@(WSh=n}beh&ZP9}Kk#Co!ct6<=R9WpQ)$*z@c}R&h7c_70tn zTCXpYCFV!D>fX7$Zo~j?8s$%~TvwrC%|OfwqPTpA6Er;AhFRiWyk&wH=uC?v%gQDB zfM*%Fdr1j~N$Kwi8gLB3l zBah$RCxr*}xy!e4I9+}=$V?kfQ-b%h*48CBGd_xTFTB9+P7Xt#>2g$IeGQDeR9L+! z!;(4}j^Q@*Mnhv;Je>HFA?~_Wh*F2*MRNzg5s$D}MTcign5@Q1cz!gST+e?3Uip^v zL_!o%JRk?%1H<`0pCD*ku!eutK8>Fh#4ziw7bwg&;^%!1iOh2D*lu_qM2*5q=(``S zFi1w3H_mQ>G3_2WJmfrF>Dj@2ety9f=Dl!ld>Rj3{f(%EG{Ew=duheNJR92-FPL;h zjXjqpv^H=HG0m{yV@FHUbA_^e{p|7d(aREXyUR3w{1L5?|v?F-jN5DYV)aD zw+a4PwHdU99JS~8qj>mcp2%)S9EDpteDug{Zuq(l+JxKJ`U-bAz41TMM4pcsf>+>= z#V&DJs4RfS5%H;i?npvTgX3ajC=dK08b2|Gy)Ii#nm?6_*YzL3>;exQz3V0%GSkO5 zU;9X(%HL}Lh9hY2)XknXYQT<^;qWa!msszPu>H$kFyqscVHiFYC0`l>bve_4?)!ir z*NVu&=K1(`H!yZJ(|_zTa$*rxM0N|XF+8V;}~riv_BO7pQX(UnkqW# z>W-nbmw8=Wz-FEa5jehhap;>7;z2d>EKp`Sbk&T->bb&gerp&SuKPsh%XhK0bS|Dv zTTbGJ=0RrmJM#HIdt7_il0<6dqr}D>)G^A38>5H7&Xg|FeQygM`|mXw3azN2da^n~ z^qRcpM~LS915D+l7&>xvG2QfzHBb0YymLShOSY{dYa}W2&`H31GgCoQ;V?jL1)Hjt zj+u^Y$op>^V88wqW^9WCg)hOFyCsecTG}Pfy%`NbJ{#HkzjBxW#W=U+F8ENuu%&A- zbmkwT=ursQ-%HTtQ*RNk`}vUe<}fsjo6hCr+ys8-8RjTz(2v`1!~OdEbYP<+7&kOq zTW#6Ew5#;#(NpT+Sknd}o})nb&_=W>uY{>?32^FCFHGy)#%F&24J$UR#>??#*w$2x z9+EnoMszT-Torn)+bwuw3W)iR703fLFxn^@t%OXG#4OF~f=)?XeQYHv?AVJ}g#44@ zd1n}-Tgjw9c!0Ho6I3qPgZg9Uft*!A|I$}#5*pBh-+@X0M*|H+|XhTDYyQEr-tB+2y&>j^bMdo_IsJGKKIr)LliQv&77L)o*;f!K>9n6{^>Rix|;ds>$M3Xo-y9#E6$j>l5EQ zM)+9G04#nD;x=0O@G#ne%(t1umqqO3`IlluB@^nPvaB2Chb@ED`BMDC!YFohM?LX+ z;?IdN3s^oMi!SpFsk+@-y8UXJ*ydgsR};9fUowN~3BBuZ_(~HVx>18iO|AICkW>8d zkmLNnye~L9x{mLFPS)Fb1Fmck(PP6qaKjN-`t?RQZ#`~`24BVK`m2Iy80FGYRjKH? zUyhoL$^*0dQ9SDKGba5dnR^v|!Z~f<&}qXf9Cj^$FKbPQNEHjPsn|k)>=jW$!|2JO zCDceRn?`m8()q##qp34_M zdrH)#hB24#>q*V4Ef6mAgpc~S6D+pp;;-})u43)Se_V*5?dhALeT|CPdFcwlS2YE_ z4MsA(;uyTTqMW_FJ`?X<6ntoX^zOJavM~Sd4!sGI)VSah7CJk;_)@Iv^`sw_KbEG8+Mjsx^@LR+}FpL z3Kcr#h7>>Z;1PU|N`uqR`uymlR1hc5#Gy-VaY&XTb$AJ^RxD3P+twhRE5{GL`iC0@ zhL(sHQ19krJfug4MhrVm_g=n$mJjC9-6M5~jQ=kTHVvUaBPHQ(R1chqDuEYob@<}t zPngCL4;r_mio1;~#x0G>xM^S@+`E;+i~Le>SW*%<^f<{2_73Gk`sDe$FHYzhUPIzL zPO*@}PFyo^B<>g=&cB4a(9Xc?P;qKL7(XyV<5(wNQM!kU(%+!+k%g4MdW%VSJMiSJ zhw$%2k+8qY!}GV>p-O!$O}A9w>g+VPey>c=eqI9Z9csL;ql{I4kwbC$0KV?D7frmp z6sm82z(MWTq1e)xUW(Bpea9*MP%7jju8G-YVa71nSO<3-OnI%hJ)11tXP(^HfOW+- zJa*tI+%=Dav+XS`I_iS#LMn_sCy!wrbMWAaFxvU}8ChL)4}KV>fsWrU@qqa~_{nN5 zKe}d#usf8a9(>`o7nDHHaREQSa11X?F9%J@r+EF+BqpC(!hE7Ix7-c)}?7`gwo9v!M}pjpO{UmAP>XS*Lk zvrDG%Bv<$x8CmdgPju;TWjT7{QW7!?TW}b;LcB6qi+a_JrkOh_@A~UXQ>v6~0b6)L85tlVCx zFWpY(ZrcFCOKQQk`UEDcs$y=}JUY<9p5EWR35|rQHv(B3vWaG!JRduq!Uca}``>1OodN$y=-hMU zM*LXtE1fRf-wv^)-?P!mq89TkH88w>DLHtc2Hy$0xZ%sJV4Yt%)VWFH>Eu8dY_SX- zbH|YY2X(k@@dckhl@)moUPZj67K>%aN231YldS#M0+M$yT9i3h7kbktfr-2m=^mDV znPZ}e-YE$*%-ALN!g#RFjE5E>3w&{1A}bI4!X(b?z_0xgXzu0;*6KRs%7bY{BH0br zhuXo#X_^>)X9TX7%R&>!05~0;gQOV*em$KP_lAPXY(sD>)L|1kK8SnL&oZCZGthAM z2hr4I3cI=N z2aB@X%YKUOiIkE&em$gxS~b3;Qpih7xE&MKXa8Y*(P^B#?-*QPxdQT{ACZkOobi{H z2Y5e{gx06`ShMm`67zi`_C5##mm3aXoqd5AOtQnC)1%l};l4Zm@M_df`^tiEd&Bjb zV)o}^5Tv}kOIkI*u;RNrvHh?v_+B6o@bv@xQyYm_9ygGKaZzBn@`>Q{oCUsv-w{Q_ zd{W}&hki>pV5z4cObC|;i6iyZGuKLE(a&EbZ+xvdG~tEl&Nv^qqT4RIx>yw&Yxkpc zjhv9@oQu^ilX30NL3s9o8Vk&d@JsD`=*DxS@a44~ym+TGh&5axvO$^O zy!Q+aN~H5ENfYQ)#bQ`&wi^3R#?pONv*1DfEnfP5uSmoB4ga}Y;2r|}z&t_<4kQ}$ zPktiqv(4M)u5>vr74W5`y_4z3u%~!px(%N(xex@qM8ViAs2nZrI@jmH@=6kW^RVEJclNQrvH zR9}t3?Fs7Ot#VxyI^76MH*5vZ%24>RY!wi#^Q^#9gnz7qn0eJ03>`I$-I0{XT8Hrh z*GHi;SA>~D-hca#aL5;Kv0Zk;TwEE!e(4q4#MG#BwZrA8v3VH|oBIc|3`1dN+Ue@l zu_v(p#WJ`XED2Xyy5W+*?Ht_MNRCc8P(3gw6Pf=ncsBkfZm^xmpG{H5)~k!DSU6O?`yoFU>-+&cd{(>^?k50X2N#|9R%{0BYIc>F7D8+0BPH;tf8 zZCk6w#v_Hd*IyDjAqeWlzu;)XbDWRrUe!?iDJp~Ab06aDslcv3 zcjw2ZbLfkI0vhu5up!TjuD80*()(AVhxuLZnKz10*m8xX_r%bdZ!gnZ>6>`9>Uut` z`z~?qZ{n)$#gN<62Xzj0c*r1)jIK7~ho35OhoKYMPtEV-rh_@xsfdHEx|{gqgMgzX z{<64*nf$$bD7F2(kDBhA1C6N*p;)&TyW4cRESbT3Z!Ly9hNp4Iko8bmS4FOF&SaTB zji^vq4ECokp>)m#bY}vWKg$4DICtSY^{&(!KkK3a>nv#wH|HS> z!uYsBD%4Vc1@}CCht2oF zb`P?d=Rq=9f(}m7e9RL~>*k@Y~c_ z*hcokn?+Ok>z9snU(I^DFsBp76gg128VkC1R|^y@ErMA$eEFM@EO;Z40HZ_i!@`QC z=-KZ?b*hKrs(%r@D}65e;1CCEmMTG|`7QWv5eZHMl5kuJ!{QzBWY2AR>~I)CTb`|{ z-o0CoAKE<_-^2 zeTy4S9|&2j7x?O0k$pk38_o==fF6%mSY{cIY0pQ}leb%0Y0+(rHk?HNB{=g> z?~-vy>0D~jd4~2J-$U;{)Wq33quFq&MKGkJg&nFp2mf@M@#yZU*eel1|7^(@UmE4d z7gbKCmsDp_`|K7Tt8dCRzGiUC-(vD2^orQ!q#3v>X0oF-aoBxC1uG6KfXn+W5GXkV z?|CVs_U%jzukn!4_2V%igu!Qq@9!x>n4>ow(MJsFGg&Y!+G^?;G5-vyWa*u%YMOk(&d1H z0ULArCxQF&kEA2skeO{5gx6GqFlE&aXwKaU(Pv`e@BqidukQU&(qf2mO|>NNV-&0K86kLlib&3b z!Hn+|+|CQNu}C(XoPB42H3O88`)Xp2b2{<5z6)kvzfVHdD~Lp3v`DU^h6Mk&o^`xb zLhaaW=JP&-dA$o}+MBmSru-3dZ=4j4*4lvgtpBjimTb{O=>iz*B)Dd-mR7sBrjh=o zk8NL^5aV~XSh(w7f)n>3x*L6}8gQV4Y*T2&&vl*T)QUnF>^=ZDIa!F;=tZ(@>1$Z+ zEHFp|Tv(M!9G>-kPmY@IW_?nVz(G_&f&vOz{N?eWeC8pJ`W6N&BA?^dz}sxEk_$}E z6|*@53~{E^BW54@jttRkB$2gU)i?j%z>k6R;lAZJ@@}((C}DOY>V6vtt$k-%V$WP? z6{nzNLlXENl>+a~t?*IEofWAk;Bfbq!slo%NKRA6&^(iBpU465<%S!|TZN+PnR4;^ zdJX6qpiatS!%@vCm$e2i#FCw=VE3X|w7$X-C9aMVnSNW0+I3;!Yg)5e=kfudHOK~CeJ-<^(+ptgBo*|$VGeBi zcC?#B#B&Gxq0w+dTv4|e?Sg{Q@KiLL-xvWM=O%(oRiie(X4FHtwXQKe z$6^%X$nTpkneIo-lJrQ2=_WGEEgVB9`iuWwih&7rmMHx|9j{qFCkAD* zn0dqy&(5_b!LnbO%bgH-JT(e6TD9B}13M&CN9==Ts!|HhE7}>z3jAW1E@DdvgqFoCBd#CxXeeO)%1D zH&}O7u%|U2h;5)5D$WW5=LhkiyKf5GpYq0A%M#H0u^W^4Jq7GeaW*Zab?l{KW<`xqQuT#8XQE8$7< zLjKJCEv|cL%~h}0f&YlrnC`L*`i4rdVdr{8C4P(Gi$osyo>LZFSW#$uMHar^BXpmu z8=4$D$KCy=^ObgW*l}kvEQ_uppGuF?tI?9&PkJpq(5_9_jIXagZxRb*D5PG9^IL<6Y+*}kw^8zEOtHC@R zWoF0iXQ}gdUD>3_`95R_=dc*37etUrgXO?;qS#fdNw0S)ON$r-f99?PuljYUmUoA_ z^a=j`LOs&yN6{;0GxIhxCFO60pR0{$MH}`Ayn;5l66__`P$JONKbQRpuO~iwlHhu8 z6s$MzWmhuhk;tB4JP`4QgkE&O2eUV_TasP)H%$gJr8HT8O(vUc>kV7oE|KjsjfA<% z702w^gO_OlXcRxj2|tG8_jO|Y6C_E?k4+@&mbAe3QFY?V)}PF|uO1`*9f0{mG~sr- z4C<%0;!dQ+RqAL0}0$SZjfVa~m49P2#H8PGffMaei<8T2|+C z8~U6s!&f~IO4aj-fol#wbxM)1S$L5IxQInR9_!7Eo0C{IJQ-M|A%obmE3O6wM_hS-!q z-t})6@ALD5UyjFksZuZuQu~2R>|KeTyD;OOljr7R8${9JpYWmnX?j{}Jl*o2H#z*k z18nS)KyBx1_)kTS1}Yz6izd$Jrb@0bVEba)HSqv8?Y>L24a&&!;6flLovBq>GVV=3 zOS4AKrCnJ$f}5_C(x5Bk^z%6KpRp^bP08k;ox-U3FfXq5Jc6I|xF+y1S@=^agbnRJ zfU>5manB!R%mj)N=i&rEyB#*_pAmX7v}p63`08iBBiWiPeXP@ejY(C`&={IS=1yLY zMSfcZuY};%Hn-=ysWScX(T9AJpGsvsRrvE)L(wbpB`ork<*G-z@lwhYn0R3wK25W& zifsv|N^e%d&b9y9?)guNesa7GOPsT4`coVJY(fR@T>BbCRgqLac_J)QlL3p@8)!gZ z9v&Rk&(;N=gDY}-_~CzZ0oqyTlfA_GjT{*~3_wbPyKj>q5!n?CNh; z^;OZn4me*gl#KuL5pS68fY(hIn6q#Lla1Ua&h}Omsos$SeOWVn{&+Yn-k>Euyu5_X z?)}DII}Aq`Zwaopbrz04UyfSO&yXoID5#&)0bP~(@b36omb&mbNQIS&p7fq!N4~Gd zm{(_*_D56PwPREDh7Cmmq-ccpH*CoH?qYbeR{{K3K>t&{6MT-W6 z2;RU0dwXm?$V>sgI18%3KVqKlAIXngee{#m1c^nT*q>7;i8<2*zrX~dyK^IStPUX? z7k_3a*Y}dgBb3oF`XULp4aI}{1?=ReY1lDRib-gPTeoWj;>C!)m?!gyOi`GJg(gwx z=kH7kCrrV)8S%hBJELJ`hN#2E0ws2E40cCe}QM=F^%*RN=Aio%x-h6;vnY0t0 z-i&5HT)e^6Ya3X=agxrkKC1usI8-j1fWBco!;z063aHuQd`#ElCBizz& z7CvH%ZDC;7d8yjF&lc@MJ`;^j8*nLKjyjh#Av;kXir)>zv-=I8A+1E@->d@WvCr7p zI$?HhoPz4tl`yA71$#b!7frTa46&bd;Nz!H#4{v_G#!}2Ur%LVRJe&xD)5ISTO*b@ z+y}L$*1)Xa3S4jZQ?TIC;mO|feQ)u`2Cr*1SgB5k_ps#o=CLMjwwAzHeim4f5 ziFwuH;TBeK)OR2H2Kqv{%@gKlVS;{>#bRdvoHT3~W~zPO_-Eh=vg(Kw+%=hmR~oe8 z_wWEzz9GU17p(F4+5Kp#ZbbQWMgBhVEBqU+#_dbT@~Nv=@Kdd&qH9wudF2FYto1!k zI=-%h&M%4V;Dn)ENyuM+w3DRo|5Kpd2j+3xV;K-`zW|MY`QU}x^I&s-IUYEtgE4=d z@lAdt44-9((Qa#@g~fwa@eAU5V*!c$I1Ga8>sY8{n61u&l@L5AhP2jgA{Trk(Iqq< zZoXa%zVSE6#ZfE3yg(Cs>sPYa4bRCn&D$jJuP|46h2q(&%i`>)A=q=n0NA$S`1@!J z8b_?c#hp{gu~SJT(RvW-8w??34IhNTxd{8G3;i3C!RX)`&j!cULT_J`?VI)%k^7|u z7%yIoS(3P1$hXbD^N0*7XdpqJnsn~Y6u269Q*?WWA#Kz6fKrF8 zc-xo-I8i=`Vx#aqCGgX?B46PJ!IAMMI8#)0g0f*rLug=iFt_&DKn>|9cB&zQez{o< zNlj_cbp03`8gPmCue8CYwrJioRfY$DOog0BDZKDs0@LYqK%K(VpuS&==RH!V>BGlB zgOEkAJhXzJH~kE{ZTj@~t=+aB-RHTVK`PdF{~@>T9fsiW1kSrg(weXVm%lsZa1|d7 z+NnAfW$UfP%OXwjtnf~s_d%2YJ)+AE2BgBL+C$Z5vj*~;U7>tu!co4iwGrl}q`G(N4h2`m3z?HY%#7omd_V8bs z|3|bRiUmKXF}D%;w|i{lMt^_(xh_Bzu=Ij9?%#(9VQyi!~tiNaAQsh82vR6xRsG~ zP(n1go?k=fPdbhH8V)$j*9sTx3&A$h!wTdt3Hj*3)wM62z%b`MYdI+Ff~F(d6)oo; z|`kG@p3-B|frx1K;9g2v?s-QfI59jG{42_E}8oLtepc_m^1F zlw-SFL&%hOcv1!HyOxP~Kp)yk%EPQr<`2V)%FT!!ImORr|H{{!8 zrOE8Dq1^eF7SxW+$NRxC;F9N0V}@DKC{0Rq^>OUe&2+(` zz4&t9+cT}gqj3`Iyv@eco*eCOS%_C{;tf96uS`PAA5<5^&#TlnOY{BL3#~`gE0pFhr z6Q5O0!6U2zRpx5(mfJVM(sdhm8L$Yac=@u?`;OD&GnPTji$nZRsXAZel25JMj^P7= zXMUw_$M>9&3MX1O3jT>HS1x&>dui)lc2u6RW9Mr6W=tqb{{p)D*t zu?3E9Q{)FqF2bsf*H}(pJoKHKKsT|mZ1SHd^crQ$Eu2Ep>3uvXr@mraYja`4P(LT|DqbRXHAEW3C5VSdnmVCs)YqVb@`)B23%R-f%mUH0!F3b zeAnku{F1Fazxy&2lO8tX`~P}Lg4}Cxm^lQ0p9;dJ1#@Ae-BMoi$bgi*u0@x`2>$x$ zczSui36um%^Er_|bjBZbe(i=QhM_-?`t?k7C-)7zHgGcNtnC-8RzwQ^*+_C>(pcj3 zJPJ0S7PEWf0x{2N7(~Ku%#Dm>jZ2MTNd9*ASiKZK_zlCZEDKUDn#|^pixSwAp&;Hm z5Qlu#g2t{=sO;Z~1+_=m;!Tp=mgGRr{^#Q1nu=KK?jcq^aEv&bBZl!scyz53-1b;4 zI5v{e?@uDT{T`s)>k)I+;^djPjmYNKUv_kVK8|o~5Gky+WZCO(+3wCVAZNplu_@Og zNcfo;kiFLgLn9usYun?pB>f++e#B$785vGj5EN zgX@DAp@YK|9JV}+8RQDS>+VXnV|6~Rbvc4|pEjcTouOQF%wM+D%z$M|8iA#7M`>`` zFOF_rfr-M~<6coH$nIH&TTElwmCTB2!x%fFTx$%Wzq$BI6qy&=w!Iqf*}7V?318NaIAUygyNQx&1QdK{LY8ID)Y zL=gJ@Y_-Xv{cvQJI=brQh?lGGXZi_4(C(NE{><2j*S&S&L(T~FxfF&^lLZG~#!XS* zHBAVAEI4UjP84^%-c3$U-VCbWa@de=5A>Qm6Aut_?(5UvK9!xVoX^c-M;DcuRo z#T4M9Ydf)XEUxAb3NW&<10R}JiNgOKA}2@PVNDaY@XuitIJtQdOnomV%@2hxnS}z= zH!_Ppaeoh1&9;1-dLtJ2t+Oq;Y{YZWo+pcKaM>dd{CQf9{-+y5H@If7g^#ac2g-1- z&PL)Ju?Kn#mf&6QIe5`G8v5$f+3}W1xGG`+oD9w({(b#~o|}Lj8W4JX zA%uFJt{xMphbh87IMTU>m_!YNj~BItJNggXo}3|spIV14!BKFiIv!8Y65bk?f_uGf zJ9c)?7Cx=IXji?8oop*1Q5D|gt6LT(eo2Rqt4i?P_KEC`Pz=~PQ64YVY{HTrTRc>o zzEWR z9{t`%S6}}fCuI5Jp(JY_&n#pZH-A5I9&;Te(+&8vC8ppiWG2Sn`yftIQH2{7sj%N{ z9a|hKqS88Iex~mt&dhj-J1>u+%|f?<-R-5+D?g2{cd_JE8I5>s{wyKqznK1w4By(_0JSx1&9`=#0ko zQNOS|4teQABN}>Mjt}YdA?{B`(>9mUP%Rg7fw1;3mj$ zA9%T{H46ugpT*PfxpBMJ_b}_RCpKISC6-^lk?u(eIQ{-4oO#qn=q#BgeL1xkZIW6Fz2{F1o?4S!|}T--_6 zT=I)}uKU8`r@ked6^B9g*CRxBbQTVtm5X}ihcN!)7tB2&4?dS1dDNG7c-t>z)U2nW zdHw@@xMKiaqOM4{7a8#J^KXI7iTSK;tu8OqX+@uLGx@)Z+xhFU1Ni=HO~M?yOSJKn zKeopvv0Vmo)Ky-GEjeJr2X^(~)-hYKD~;f`=uP;cW*+NQ$cGU{DcCUS1j#>l6enMO zh$nxBllR8wKx$?wdwB5>`KNCr_}26|SdXXEy;g`ktlyER{iFCjr5e#PsUjHiScPAC zH3E_!WkRyxoJ}?x%ugmtk<^p&Ja7qQ_8p zNP=2U8_V%ZF{*UgaQCv?=#ih#9vcqBt&+3Qr8Av7JXYtL)~lJpjEi*YUZGbxTAM8z zIi7utp2XYgu0ns8K2a!qjJ_`^RVa@aI>dgVv!5yLw=%)#^v~$^qqQneCzdNzPK9>| zrt$L!boos^Pk!BW3c6nH#%&ujY>j6*aPiMb8asa{|8)KoP8c?tOZp0(CxHrF)I^2u zlu;->wjUDB?jSwbfjWQggA`nXeLLmox}jmvmU;re$9I#k6cNP2H&Cb_ioTf-aOb~w zAW<9(PA+_dkRNW-urn!PgissNa-R~f)(3d{ld4?|daTN#Ug~0flJCLY)5F>3& zV9s%0w2*mU{d%}M_0Sde!~&mkvo?kWc=y1-O$G3tJ44eUN*(^ZCO7_`z&`=Hc&vUu zDvlk?4X+D#kl%%vTQ!U+DaP@?LE1EH<2+O_o)6_8BG{iiJ5Wmhiwkn<;kvF0k#$q1 zT@|;<{PXuvfwsYgN1oz*l{I3MFk^1yuEXUDbZ~CmFOg|s0V*wzhm4lJaJaA4c5aU{ zZBQG-<%%-F+vPNFm~RFT^52M7PP&Vyg??zYd16R@BV>CVCFt0xZsefW0xC189&fJ8 zqI2KhCmGA5asRPx{EWb^f3ExqkI#KZ-<55|t#cjG`S%?Lwts^ckL;-K@rSS|;22!U zF5=JfD&cZPA!I3M;L8Lj?*6orxTL1gb<6=SE&GZC-hX3h1Fr%!32s`A5xh^K2IepI zBGoBEe@Dk=_&Rb6`DZl)at^*0U&<34ezM2VcW*ZStybV_B#*5=z7<}oj-lV?45l;B z)<9~|4zhld7fcyu4ZpIM;X$iKm@~c{dPx{I{9b@cS##lI@MTb1;tGEz-A5eifX99% zV~VE@{tCSeRgul&@yn`k;H23gnY$Pj`(>D|ZXV9|S`5pjeQ~?_O>{L^$3MI6@#BpP zV#O2xnEQ)d%=mngEIR86_oIZK_wrn*RGG!L?VpD)F6=_n!b7Mevlo9@D`3FuOE~DI z9_cqz$1_bgP{Va1TlHBEjrv*j=NxOoz74?0bZs!m?-gslZG!h_6?oU&uOdlRN&Hk` z3k&uwVD0nwz@5D^Z4S9~@iH#_as`R9 zGVY!+heeDDgTo^g`ehwdAF~Pl!-ldH|0F`RO(1YpBgxx3LhSu+D;zx~ zh3(V2h{H93Evn5H2Mk+|Kib!z>c98oN_MPBaZW1JDKG{95FJ<(@PRxKoQs3*nu6-B zI9wy}l=arxMASb7^5pAm9y>;h{xmIM)=BfQ{YVuwJdFZ*7kgul!nasj?}xXV-qN#X0g z*|5cQ8N5AoM6@+!E6d!xf>E#6;O>|P7mh}eu1{0&R_ZZu*Z+#+tE{mpSe9ph{DK#a zXHpcLcovoRT%lnL4UC!xt?kVuZTU+)6EJ`;R&7GBkOaE&u@PUjv6Z=~2>fg9en|bh zfn5$dhNB+GGe2>kSeiAG?ypg-VftR0>%1R(1Dc3m!8Bs>-WC4tpN2-GB5;n9CYsNV zgZ5#oMAFOs5d@w9X8U4r-+8i7V;O3{8wy@F4lr`MD$9uHq%7DTbDj2Mp6m{CDjo2LW75ddoz(2KwJnS^$;{6j~lj=!k zsx=NQW}86K9e=oV@qZMZhhNR#AIEPRDwQHCv`d4A6xBWNQxSyC)dt(u^DG3k=7F}j&+L_VXRIg&VC{A z&WyL=f76N}%J(9Md0b`JwvXj2og(S?^%wDbc_yCD+W-$|jO549+t3~Ng6NIyb7~u9G{4rH1(W9=5#N693mX@XV>J)_d0&T^tB-@fNev(LHkJRyO>)#mI zUx5z~kimT`GN{+Zvv}}&A@6W3$I#$CShC|1o|NfB%}4JsM@ET9cH7bvnE*<)bMa)Z z&=)i0EHAmWk>6Vr$zQm*(}@RFc~X;@!FO!96z zzJCL(Ri4BhwfzLYwBUofAnf&cAEYk4$2M5>LT1bWY8~^Lcbx9Sio2t^!jc;(ud^DK zcdh1nhrIaR!Uv%Ft{TUVP@tjNI^4HCn|;1H2-jpDXKlgF>nH zQa_Y2^g#zTfiE;Qhj@esVL;_CG9bR3oiRp9u7Vm2Y;!`d|4K2l>L=E`91b2?(db#c z10yxR;RDS+cr#@rmk4YP{h3!mbEpiTI%FrVWKMjo?G2dRt3f+<7f9BQIfNE--O=y3 zI;Cpb>#;5DW8G718@30pbxPA4h2~&zXcWy^R0{fDS0rn% z4kE+N8JgA$nqHqg+JG$y`CJC77t3JJZ4ExzJxP4tI|_MW6pkNPf+x-P@Qv5ZY0igJ zjMV;Ruo~LLiNxH30BJT{NKCzN|)8;~nQbaJ8UvER5_21CX z{{Ye6tn^8e&iJAjx*xE_8pzHaPNkNekieZ(Y{& zUpDsbEpPZ3cp8-k42LqId(VIE6|wz%d%oUd0p0F$3d#TP^C#C)E}F&-9Jj!zZ9%;6 zb~bjaE`X_f&O_U|-!N}u5j?0c;DHn7!D#!tU{`+@%HM70sfXwCy6L*~-Ld`LcuF@E z8LQH)17aH5y^Bw3vf#r$Qr=^;k?%hjhVr$_^!&gE$TqtQJ0>jP-KVdxr`0mFUx5cv z+GfHp7`?(3GYy$jQx9fu9mK!f7)tjltl>w)vvK{Pr-Yx{P2IiD;kfmb9eyS z%wljbrm%Fh()RjP4h$!Qi5l8A{?-=TVOB1nsTSoMZnRI+LzX_n!9A{j!n+oh?z zO)i>LSkvG4-Ld??P`Da47fY_4#g7$txuE%iaBhNIWezE6ct`4n?Zf=#7U+HRr$oHK zo#+lV!RXwHWZEw?`YviVmuXAp$EVA2lOfLZ^`aXPnK6*xtt|q#3VABBcB=1Jodo4J z&q%}&4_IoghGs$@<9z56^mShXuBXqCO-r9Mz2{t9EPaaD-Wr7iO*UX<(*yRhDGod@ zj>Y4{reNdS_YE=QOC?u6%wwNrqtG!R03P1l0xxGu3)+hzHie&Vn6=_AiO`#f8}oG7 z_bp@C@Ix<{dQUx*N%+fR?_XiMBLcAL@>P=epE26~r;Oyw4i>o4542vm;-u%e}ly|6WbzQtjf z@G=q^dr4x8OSOJZSMGc=u{tz4cr(*naXJMDT-68+bavWNx4@W0m zVj+dC%p$hp{gr1(`KwjLeBvNTxw}lzh*Bl~R34-MnL)Xw0ckrR z7Z@HhK6k=?r(lKjF@&aRx*sC;FgSvDEDP#|8OoAfZ9;gLVnhi0rX$>1|8wvVn5=qjn7Iz`!drgy@r5 zAfI^~XO(3!8}(V_NW#>H=oU z=l}PUE&9^HcKqiCY0_OxUwe+YNg)cy>du45N;d5ASraI25_wMbH!GK)BC+hUfwQ0IVrjc2 zW;NPD;kVAzP`MD%y) zKI0Q9=x2yy(-iSWg*O;_hGGh}W@$&#LFb(zcs$kxQ>ijiK5q^hY5X8_C4LyQ`3z}K zk%I4U>e$rTTJUmMGRBPjBk9aKMv~@sGr8(lOeIPm*7yqC<+n#jXa8-)?6NAxnZ;te zk2XY}7)3JkJaI<56Vxn!#6kw?v#%zBaPwF^xqq~niS&BMw@hm#Dn0g4B$LaQI8B1` z!GeeOyC=q6%#&ouXOn#4bGE43LWZD=J^DVFe;)jwq`}}Q1WP+`)yQU;|H=U310{IO za}WM^+l!~ncEw$1p5R)o8({lzy5#Y+v1k*a2-%N|;cHA($2WK1dDarwKtiaJ( zP~=Q~!~4PUGmH6v{Zpyllr*m2yo7%V+RT-1mh zC>0+_rQarm!FpltvkY~Eh;{quyrKXMHf`i8AG2wWPDO*7O(3@E-$l@#i`|QT@bEt~ zj8xNS=eBrb#=vy&FLFWAy?Al_YIE`V!MSXP^m>@F`iZ!&nXv-%cxaDHhGCcr;a^m- zYp5y22lR)$YpLXeSrW9&od@xMwPCX0sfwT3OnhzR;q-1@sNAH)FXk8nduoL9Rth_h z;Pp6Yz<96@y$N2zIlyN7qba$Cxv4eal>8Pq9B9IkS4zY^TP)%5(hOF=VhkR?LU8!1 z@%U%+ILW6S=fr31Ut(+i3tV|w$m8Wr!2-`FSllt1rgdlHnGZ{8```&Vj6;PpA*mVDdA5!ieubh+Ob#s4E}AXC8Ecy(OODP!vvi zpe8>uQv?0YsxUouFjo)Ez`5hq@a8TnY9?!sUowlZVQCb&L>|Vp&*S)k-cCtp;SnK! zu?_Z(K1MC-Cei^xEzB|?iAH%F(OY*$!0FEh{CD#x{;$WAX4m(k{k;SlxscJ-UyHH% z+i|hCw<&0A%JP3pdx^q?Gw9QjNJple;VbpC_)3Y0-!1)#6Wed{_^e4h>yj=D>u7?* z!K1lf)e73#o5H`eNyMJ&#DPBCwo;Us%35G87>1!Ad_mroT&UO*~xJrx9RNM)t z)stC8jW^sjpAR?8DkRZ|Z^P5e6`<6R0nMMbQu%5Z>U+-^0)zg8U5$h<(^uhvTK(xL zg@O3qF_qjOKZpm$N7GN)!o1ez%R9|?(}}+t_~td)bnE+Ix^Lzf-ej?z?1@UluG1P+ z*)fw#|H|hoOX}hAgr-h%RgW%%>-7<~#Ot_Do zxiNf3Oq;+KnFtNx2eCuC8v^@p#5p%hNF|Et*Un6u5VMr{nQ!9O8%o*iH}*7ozNPw*wU)^{dn7#O#E+mC|wt)E_mdlKx$nXPT0JUq^2yyw3pjxNwWfzU42BnHnu-E zwVc3yrX|r-tHV&2EEYU6TkxFK9<+Oh%+%l|GgkY7a~E@rlY50n zrDYOKyE>Rx?hSw#|I5s2&N^PRz>0?scSGMPh1_X+3fU$4g2v}|L1E8jJTqAu)_BH2 zpmRPqDeK2w3NGS@wk14I_7419@`C5uydzot6Cm~NBb1oSoZSxkR4=U$Vyb%4<&+bz zn?De4S+{^?l{TC+E5^Z!fmAcf9<0MAqR~=4tdx4jW(_tWq2GUszoHA&q(70IkQonE z%U`o~`{IQx=u=X8zKj?(v@@B@;b5pq&}L;5Q#I@&F~)XK=%$YM=_u%(xDNA!?C@bJ zL9_mTI9ex+eA?U0rWoF2`@rP>WR7dJk0?;1>VYl#Af!?bQp##h{remqA|Vg7FoXd0nv{ihKOyf@sK^fvl}t8*Q$iWI|@ooT}FBHWJ)r?9S>=`icfA4%gJXR+wle?(+-%kj$o zE|TT4f#~V=lZaZ6JIt7IiyYG3CUjnn#F*PNV0xuITI)vO@827ta*rJpmnMVi=2OhQ z!iQB&3ud!ga)_|Q#@}(nnf1YHQlnu^MjQ(yx2|l$^N05_TMs?(9pMj$<~6V}IffV~ z|5Y+cG*MzN?}s&4^~ec>9T=y5ggkl_!p@oxz~8k75~<_@Ha?{uOa6J2>-dgMzg0rU zhu9+vTmau6SCGG3M-ulh&a}Qg)K{bAx6m#b-a`y>rpzk%^83fYVVQ1YuusQEDeUK8-ZTSI(7nI!8K-K+o zNlk+>=#}}1Up|O|ntnHlbLL<|*^ zSBCV$)h3ka>W`=Yy)%VVIZ@bYA4FelSLWX}M6_(~J$SBY#cCpFao561Fg{vFZcZ5s zEwgiZvxy449+U=o8;^?3|7M}9f;P`k&LXa=cbHnf2F4c;5%OEw*x56cJ?-~`NeTMw zrHM1JTFooe zI_U=K9$|%frftl!*%`S6$zP)lYON91YsR# z-$Urpp=fYIg$D^)oB7JaN$^{F+T6PxPYulKQ8BtWT(d}+s|4%W#$i`VM~4&`WmyYm=*cIqjqj&$Hg0dCxKj0N$q zwW3oReq!|@5#9|3zOQfuU9<8R3|RUOmMLFAskv3u&{6Q0zmwwbd1I+)Q539_j-sWH zEg*5kKyK)9Mch?9n6!>Cq_i^_oNJbd^?LmXo4k}&%nrry?+dYfs+uHZ&n#92SDD+a ztytb9MPj=1nQ7@(cBm`>3UmQlo~wcH^W`u{%?+E^J7Gb3IZ;_Q6e@kC;D^A;+c+he zShx^8uxC7^&S6YEbS|s$nTwZ?%)(JRBY3sj70fw022Aq&QGJhIgZA3#{HC@CDt>Qk zu)AoBeO4iudvdL#AN~UOBMvCmuyfopN9dO~Hp1Km=NcZ09I$59JxFeujVg(Ec?tc0JtC!kQi z2ZJj{5;M(Lp!#zsyj8pbvm)~(D%IW0PGbO#UVcJ6s(LSq?>bU7_jzFRr3$*X>Pn=U zZbM4v1@wUk)8X@I^u0Y)ag`FEI7wA}E5J+A zG*QsNJTk#P#S?iSmfzGY<@a89;Dx+U96a+Y{#2MmPq?>o<#F+Jh+7Pu z5t~YfvswI>(B<6%6xf&NMqPXEb8E|q{8w8d54uA6?`)yprQKL;RSn;(j^Tz$*|<176sm%w9JM#3aM5xnF4bR(AE_8h zH={QXogXi}S8qY4g9@GH?F3Uj%J7q}EAaF@fzgwMe{}P(ZH*f%Hebza6&B%^m;Gs@ z;1?e!JWGo21h%uT2lfnY;OcQD(0Oq=-o4n0j@kYAfMc<|a6&NMa=BL0K4YxlnX`hf z$Z%LNbPem$Ux5fM;c{{imR`7ui^h7=PNT~hRAP!#UX+9L z+k>D~Fdf9oBXM1=EFaQl1{Ryg(T1B%%>CeQ82GCKwj2&eJ?TwQa_AWZ-4XE>9qM$t z^?2@d*N1An2&dlXqI{ybe+g?aX zZI1X|a{0(+v`$MAo2bjAtji;)4QS0KZe7n;Pvij6t(s5nTQne;h zpS=#;Pw=MZgdK&H7vp%{Hd)D;c~SUma2gHrUPGU6K1LsRDxvq5$v8D)CA<}wESN|cinS1Z_e6-=6T6Pc5EGfwCE-;7yU)grlHhn^-bRVY%mY&`vcb>hmrjk zx4_=Sxh$uomE7&Fjz4)rwki+DR}(4m&;1X*96pkT zcT^-_|E!1Lvr1$^;#abKvyiV$(_;7ZkHIYWR;V7>j$RQ@aQf4Y?AfXUh}@aY_8P6j z$p#^kH@iNAtEWv!!9O!Qqiybj1iR~SET(vNlgf5IA*6v$aqsM;@FWWs)wR$OgbIlp$ zM%6O%r-sBYTQ06X6awWN3fUm@gDle4g2*&Pv6Efq=z8}7J8*p~%9lmrjHjExzrh=R z>#T*B`IA62wnCf)8Ia^WRA99|WTJ7=pgzX{S#K=X)XGBR>kf7)VibHoeT|9S+Q|Lh zL!{<`ArYxZlTT@HNl?#N=%^hp2^%{EZscf!O!{>a`r<2lNjW2)MQnDCkT+BMN!lHU zL&zI3i1dy*bUvwLk;Bc&g0Z&v_s?RyzuF3YW7o2f@eeTMsXw)gSWEK%+EVAtW%QD@ z9^3hQGIw_gq0`4~=c7#aF=d@{tQRs}j+ffN%{K&HGIHpx-_CSr*D(z7*pEveKux`x(@tcuod4irqYouGQ^vmOHuN`o)v#Kq|<{wlKBHK;AXR7I8lI8 z#=jHl7m|X&`(!dI2>m73;(~Hh#)-^^4y(Qu3uIj}^=IXE`a132r_8t6^-AvUKASmY2pJf&a_ zu~*DlS8t*)LrbH2wFd#*H_>`tZ} zFF*xh-Wflwh8-!pi-jiAY}DjXnw+u5gA#~lXDTR`tO_o3cvP1>sL0w?Rd$=xOw}=TH}|OzL2u#AT&b=;UAW)IQ`2EL*0}HB(<=z&bas zeq|zMch~XFry2wnp$7f7ArlQwR=~)Yw^+*^N{!OT!kJm}kPxuj?$V$Yg13Bl)(O&^`5iu$NQt#dhwC5Z$ECxF_fIFd zIZN>Tj|(8yOJwnW-V*4z*DJ2MRW5d)vI@fjW<&J^0-}y6`-v9bD6>u%+nd8MD{d{m zYo3ca3P$k7Fqg&r8O|5UIW1Md%cswZMFqv|_zxrO?N*?; z#E`ouit%rz1vdPxfT*u)nD&Eu{JFk>otD$2dkz>0y-|1Bi~&Q~*6e7ERUXR=lW$7i zFREw6=R5qUjD>>G_vn5)1P&_?g&`)g)U0|oNZz#L-;D39#`gf<|9UyqcP@hr$5*J> zd13$F;_w2taNI7-H_Kj zzC?z%qzYAU?dUeEl%EZ%Al^5_$c(Pt)ep(P~nZ1RUZ@DtjARymm0v z-`B;~TNi=MkImdXv<})8`=Ni%RKD7LEl-|sOl;qwLqBLKaiu~fHm&~(p5&6kd!w6K zlK^Id#(LcRcsc!;n~4KwJMo=r7W|Q{4PI23O$UnfxcAQhoV>w^eq5Qr2g=+4J1@dq ztR8^pyzMaMpchG1i{tTM5IWwdLAdN(G~o(C?>@Lz;yf5Mdo=l{Zf@NUd$TaHxPY3_bl&8TF z<8b7yMO;2inogG8ib8@0tz>n$>w!7!-oqhu_;to#_Kc-}{)u6J*<7ePph<6q8PZEp zE`akVz%Rc%zVY%0wqMN#%jE&-?a+RUhD9&34Af!=pI zLHzbBaU0YlWco&9y76S%)p#BqYg)#=V)wwxS5qXD|LvyV_$V}v&Lt1-oyL&v@w98*P@ZXV z8J6v@#NW?~Y4rA+NPzO^>?5-c$3Tz3?)$*sTHfHi*1U-YF?mJA&(GN8ux% z1K8K?PR89%W!m-9aKyfZ-EsYd!whX;*co|rEI7wvrFM}kr%a*ky9)l!9}h~}&%wc0 z@Hg)L$_Dx+oSEWWBL3#Im$=puLYj++*V7$1spSC5ja-BJixZf|m_pRtT1})D&BAy3 z3fxGpkSskg1m@43hYQtBk-Z*;?>DKF-yM?(xKD+;Gc(B5!HYrZ@J=Qp(E=J?f#pK} zDOkCOee-yNIV--SopAnTZ4QHBUzFJ51Cz+#12uwIV+*_fY93i;r3CA5v_NI`8@Baj z9@x6xLak;_WR#N8OG6jmxlf1P&z<1-*7Nmy6|C!J5-))L)(-N?_P6+F`ylka5{B*# z%_Q`tz}eQ>1fqn!4IPKNC3olOL$hcc7+)3ov!?B3drt~lZJ;ZJ3h#IK{+=wf#kU}Ta$z?OOn`Dydtgw7c9l!6({Hkn%Nx(!ZsVSoK?EecsrQIE*-}jtsgmlGgrkK zUz1?|*PpC=@mg51zliAtN3)`@&LAo+ca*Ce4E@r#vnx?{EMw#ftU2_GoiXudvo4vF ztVRNr1^#&d;A_(8*N?P)AkZFrmAO}4B+lcPh>u%r!6ns8AV*{(^nRtomk(+ndVkTO zqo!cu9kQmT!C%Gp1(H# z>NOoL1Q)=6=T6~`*T(4MH%jnzG!iy7gm#i@awO>l1oW&#(S&44%$;Nyv)c*-N^JPG z0s1`FBp%+5v*okbKEhIWIc7527e;tk!koif!FsX|iUvM#Sdq3K`!!!;t|~KdkdqiM zoVRDrTM|G|y^JLFrou~KRmi^-&qVVNIHVjEd>rouALa6yLMB}eZPP9Ae!3jX$Sfhg zKLRjWXE5eX{X-U)>?dL6cO@E&+UoW8MPpokCTM^8a&ti@La>aLN?GqbKwI=aPYT@^f67uTzUef6Ffb`qYC>dQL#WxPRS}$AX z!<1iL#n{zJ4ILME^1QjbD4#HZ4LfZ@^RMlOyWL9Upsg(2Rg%W$-9qN97pTRCAo`>M zXr0X}uHZf!YOd&_jBf|%mX-1D+0#JLbQ&+VJc0>ITX^w1cky?1XPCB(@P>+Vs=T-u zcGoYZNB-=_yFS+iul7xDE-)XGUt96TZ~OS`_PgjgrG!V;?xC@VPm--K*6{ref&7=# zNfOtLBwn!O`^SlB$}0`NQ1c~=l+xsji_`JE$|IqhxSi(&bzyK%gOG>U=J)-*sou}w zoL)IC9%cE6{8e$K(;X9Vn$%u$-Oq^z{E)y<8DHul-UzElcfpBqe!PJj@b1dTymtQ% zex_D~KN|H7?&>OnrfDn7NRXvNzX}|cw|9xg$9QC0&k=V8Ihd_B2-lAv0eVVtU^>W} z#Xd5H(e*z_kc=g0Dh|PBm7(A-6^iW<`v_T`2&JDVg8SK zRC;&PMmp?;B6SN^rQ6&qxZEgHnzuiSnSJo1xwihaw7eJ}M`_S4tHqKBAM`QY&>S>B zt>G@yf^bG#2H2f?FR?4y#iM@rpyd2YV)r5iN`hkOlZ|5oSFMNEqgnVodoNM@eji=E za&d<2UFcflA!swYe8J|OxSk6cuQU-&oZiYh9(%&o{wt{B-!53US(o|<7F2vvh6$g`q25WE{@e2awXCvv!PrTdwL60{Wdpi)=uBo2>5j+! zHVeL`42AIfT;FDemw=No^^}Zz9|A7as?DgZXvWEbxjHI!z z*ReOEQhrFth$qdJr~B62LW7(Axbvtw-e7VK6&l9S*3>QZjoW&f7bx7H%uVTmBTkv_73t4;Y84flaK>IJSWIJ<~ zv%EWV@b7_vsOP3ho2@>vun9tDX{j8KIC&eSEwlNtizoQ{50mIE)m%8yGn58zI?Y$% zGgP|TO%_^SK$*PB{ONFgNVi@_*M6Qu$4!^zd!1KvrQ(^~f-d6fRyQO;eG=TXH53}= zE#~%1+DQ2o1AOzj4IZAofX9@_;J%hKc;wPs@&2pXWX%zOJYkj39Y2qRW*vKI2wTIo zugBopY;~&II)ZOBv4N-W^T`Iyi{Kt)iTe}Yu_R1}D7813I41`NJe1+)QcGb{@V-Ad-91#*T4|bRU*}A0QH}@@#h;duz2Khv?)Exwkep=S3-8i>z5aqNZ7cmvFvEbOWxhw=m( z%qQMT#r8oDuqx#vp0SCg7Y!8HSF1ndKgi%yZV%%Yf=8!#XM*rt-$27@Uf9;Jgs`@^ z5`&_R&>gyitUCWp%=cNcPe+fFR_So~?cvO1toz}Nb(W-~GeGk8!YX_+(-Q`-DQ3;b z&y$FP6ed}l2Y0oF`}l^XaClJ)ewx|H<~}?C(J?}1UAPBs&}xyS^zMYFg}JySVKSQ5 z975Z|1u(E892aGn5!II$Fm#28OnQ^givE~_S@0Mb8#n~apU)E8bp;XQ-*fG!PLoAl zQ9illIp6W`vxX=0BL{6`?d98Xu|_#o zZru(S;)H(K<=5fUovlpTN*A9sOeZRI8cghGNM=si4-Lth_;RZQ@zZ}r?0bwLvF8o3 z)!4xPcHf6x7sX8VLK2EPZrjU;E+(wz66v>l4k-D~hQ@_Sz(3A`RdsW~e@`8|am@xI zUtJ?fp&wb|ssT`tbC2v>AB1|^rQ)02*NA9eA6d0%7kPZm7xvyt#;SZ}m=U^@*dN*m zq4jr|<-SXJVf=YWmMjMk!-2T@feb`yUcrJFD)3@QqT{PIYd}RU52`<^h%5F+z_-*# zu>I6@xGH-G&pFHEx@Vc}q_8t3=ckDie`$%OU-UzJuRr4E^(XN6yiqW6^FzGgewMYa ziiOjwWy#H$QruBHfQh19iTd{gWaB3da_v$DF)vF6*`%3pUf`R_58FkS{P@nUuPw)A z;}u{}wE@1haf0!w+nHgeH?9gwWW_P}9Es}$d>y)2T(aj240#&?+Xq}j3yTHpqsD8= zB?$?~utO5zZeOOni@o7gWS1ni@=;m5%UlGY-y-itEGm5gY}8$S_suIZzw zcM%MVN(CC}2RCmQ5|7kvB=+<){5mxP@$ z)?}i0$5!l{EM&!N`iQe_w)pY2{k7&-NIB-J|-02+-j!z8X zXk`zZFf$B_g}n8{KFZEHJtoO+`fU0IS&Zp5f!Tug??tQ7XWUj6;~$3Bmj&BjbSrnj>UQue&(W-^=TT&qrg4o1*E8-5M(^5QKQ-o2+?BS{ZV2qbjVzweDJYK7XM~fx6!Jm_`Q15NUwv-uw$zj^~L z)#ACkQx+LCM4b-)&hYEzRCsT;8%vAypy}Zvep+uZI?NeMql&7~Df};}&IqRIElR?D z=m^u?ErDeH7@-GQmd;B$iAQKBw#o`z%z5RQw(ld(zvE3-$t%%6PMXxLcsR@Y^dIKl zxdEFBC60kM?%YLx2Ucnfr|wQkIHP|i>x?|h?!bBcH82awl|+yqXN^HwcD(qPG})>A z6|Xvtq;jwRKv}5JR@I!l7dAe~d!nd`t ze63L$FI!+k_E)&j6#+7|aMx)H3@d!&gRy>~?X_xYSt?z&-$qGDy22|40;x+19`Gqnm`aw~PuQ=ZZI-9nW z`{AYZk%}$8&`y(BS&ZRVa~0{O;Yoary#!tIR`X>!T4ztaGo~wrUHTZ&W*V>aQKFSI zo<6f|!Uq#Slg*&UlGLGhMUrgQ_?`&6OwOm%K~LLOcZ>%c)RzBEa*3Qz1S#e$1@tn&!M@2*04 zb5e@_uzl~?HSP$Kf&?z-7Yt@!E$Io@9%f*h3R9F4KxDZPEyPQxrn?<~Uh$NS_PPNs zM+fj{|E`0==TSK8*>&up=(n!luz!~TOK^OS zH~I@X`3;J2T6+;)w7nng3T(ra$fLMcW(hs5xdUe%DT8CV97a6o&!e}UVn2S2qjC#! zxxw!Tc;}@vHQ5kGOiTiJR>n1Qdw40{*&fcHI;4~BE#pa*>rE(g-wjW8xbn(LJ>dUs zDdvdM$TqL(^o-B)hKm0c@j!iG}1l$`_o=eSq#e@ zh~1O!!!r5L_%>S~sN>YPlm^S<#Ax@QTf`;KDv??1Abx9v<6Y5;YWiy-Dtvn0alCJS0A z`0@NyVbLI0bm(x!iv8hib-Eh_bTjO&No8d+x{?IZIpRMxQ(&5G5l2s6hTj~sNYJ8q zwx(BxI6Rn$POFP?e}pT{F0*2<6bd8>IFDFqYQd_%yKvHeB?!v>$1+XlKzG$t98tcE zEud07=*2DQOd5@&-5(IIc0qO<|A77I)WRp)324|R zLYcC^FzCfU$^J!m$b8uk;#G04Noee8IOCmz=WO1KXW#wEddJCwD36J4cQvti{XDVl z%ZGv|>NPnuQW4hogyIEZrrKXKAI^M8h7QtBL`#LPjF-a%-`qv9|AH~lcws(sFA*~7 zdAg{+!WYjt>65Fcld&=6msl@jmAEF%2-jV zKiln}9Mi)WG?Mt~hI75m9nhgPN#ucuwF6oBpCuCw9O?ZwSP$ZkEU_ z4#1*XD^L+;uE|G#GLd?|W94CYHb1|VmB%QG=V*vw#Gc8};Z-B~(q9f^Rpp@Qsv#6S zo=J+$C8Tm>AZxD}hlzpn;OKKdSf48V-W#pacS96>J-rNTzB>?)o+sqT#9K`LC;^=w zFSw#JnUwcl631V@<+wk|3N8&B03vx$`$%^mto*)`$%w`YUVzDb4!s83H(KG`32B%( z;0l{eL)y7Wl$kdHE(rcGQS0@FO;et-crRZT?Y+;3;2B4_?LH+%1SyE09kb1=+jPG}Z)Lh78 z1*xIQ$Ds z9H+r1U$)~$_d6tqORDk1c2A)@B@`}yZ9x1i@LA@o(Nn5oUb`;^Bd-Wu8`8z(oUs$# z7nXr?6UtG0%13aiNfeKf%Oyp#YHo zU;Gr7zZ*@fy846+ga>_aYb^1L^28)2)u)#~v}I=Wci< zbuJ8&(ZzVXJB}9uPZABmD-$wWp5;H8D0w90h`>&Sp?A(WDhv9H&dc%mTQ{A2on42E z|BJ)te%g5cfdkeKDZzxVn(!olk2v8`Do&a21%Hz*$(nOvY>wEI{f$&ax<>>5)W_nE zQ8nN=@UXbi`4nClJsf|=O~Bw^lb~j56&bmGDhw}P0$*0XhtDHM@U6oW$-`3*;ZnUl z_mV4smA{`0|FNAk@Z3c()bN zL9#x$bKfLz_Y%A`^{-i8)+Ly3xCw{vz7v{tpz-GyAe&}U3`ff~w zgMoV>W~&das`)@WXLN(E?oK|Tbtfy`pG0d544|p>28ajLqxV&FXdV>B_r-3!d4_R*UsSNL*eZleKW$b@F4ue%p!Q|?Cv`H<&r2M5^ zB!7o&&1ry|Z6fL&w;$sKuUS@!6y$q981J6^rqPxlpVZ~R^D%<|8mW#0pEb`2B$#w zVKO~&H5MWBAUIgIQfUWUT(V>h)lsTOHIHlv-_wd2Nv<^J?KX_h^5$=Hf8e9D)+qMe z%nV(YgL-$5xOQzEelqd}?eKK4&L4_zMqL4eL9+$^^+cXOcmquxi84dRg2h+Fuv*?)gGCsdN z1Dj{-fX>)gxTP@#o}3sWF1|jGzpQk|{6in%WI8BvcYoX+}cMzBd|Z`j!TzQfXFc)}-n8 zyzjf#?^ug}tmRq9InUYq-k;BPDX&@p-{jvx!=xd+Z1)+gRU0f?A?1Ucg~ESDs<6hr zV$Vlyw&P;MFy8Wgp}^5fF5SeUi7q(yBrtJfiIQdQ^j^sVdfYp1@PaXM0ODwp;8MM_>2;o^mn}I`4jfQHeWB5)xiyGfpjZ0^ZgldfEr3hyY-gvPrgfyR6BG!pMMH&|mfNsSB z;^w8*HoDJ4Nc@?HWTV3>>~%|k$hl`&#O#yIOD~758KMUjBbK8{ybBIcm51IwAAuzo zEzCV4Sft(~_D6Xn&T({S(x-cvLv*%i;$J(wnqN+w4jzE&O~bMObD1c8%WhF~i3Fr6 zwvxq;Zur4dIExd`I@3*tW08k7Sx|Nb-ujl~|Ku3AgtW2~(z4LCG7k&fQp8JleHK-} zF()Hm&&Db5CPA^>Ez;>>POPK@An0ZgJnuF{*HfpMljLQwZe5{h#)#3(&-n=nw6P(jNc8tM=1H$} zSy^xvSrX8IAEW*Q3;At$PJbDx3_8jZFQ^K+_tAK8#cy(=bOyRiKijy;OJF}}kH&cm z3Q_-)1FHYb!sX@1S;4XY*b#>dYz>`>`aU60={69LxO)(f(gu>WX%E^xjwO+i&Cnd7 z%ACuVVyEd#rfliYWNa)@;$({0>Y%`ho}de!t8Wv(0tcL79s>8jD}kT?T5Rq(MotWn z2G4&3h{TU_Q9+*pR9Z`u-@OiaK65GSy%qyk&!mtV>r!_0=xnU;*2f9XS4h~l6jAAi z?W|_(Vc__frggDT=u>iWx zrDKS-_A<@WzZ@aRknsy)++MPF}1)pcDUWg>JX4c_5EtNjpK zHiOo=KSYJZ960{p5E_15gML=;hmR-a=$i@k#Mjh>f1Q#9A8R*b?ssQ+sbdC-2_xa+ zzf(+Cb`vR5y-6zhS8~ikf=vK3>oD#E&-1nsNXe^1bkHpDm zx8kq+257lI5$yUCNXA@a5P{Hl*<}v~p%G}8_n7F1xqM8|OA-~Y7Fd1t37f!{k=1hjY)d^*!}EC{;WU=!91Ig}@HgZOBp1M( zyXthUkPr8`kVf`s=kup_sZd`nbkR0`#{EkQiR6efuD`YkpEs1jwY2T1ZZG8a%=RL7 zoK;2of>Qp4$0&amBJ6d@W-jzUCE3)O!a(&%tH1_}4nT5fT7HYsR97!e?mO ztx2<uRr{SZJM(j7p*j>$7A&cChbO%^14G}%PkU^|1J!z79lBd z6<84;FEO*EL#%RdD#!+ZBPp+DqWj(gfn)0n!{er7B69_&ifcso!$r}1$;HSvW|Aoe zpNM0J7EX!xVGT}y3GL7YRh=*5uTM>AQEhV*Wu?XxGNL{yk^xa>|9S z{3qrhJ&9lbJpp`g_u~CMm+*R_kXx7E#~!$z0Qn(zS+L(du;`fpHhT6vK6DA|h}? zkAEYF`3Vr^6P#>%iR}Hm9YZsBir1JPC;1cfcxBHRVfM(Vxp@)`tZyZ2WGS8P_Z2L6 zs&f}fN6X1^{QUkJ0JJ0sT6ABSUE_aGneQ5;5fI-_~Q&@J?; zfpD4K7!mPWOY=Q`{) zdY~_N54J>2>%|-foxv%NseGiWA6LSoY+=Y*-r(&(MU$TKdi%kALBuTXWn^f(Vu&r5 z8(2dlKUMMd?zV7#zc0P?W*9ZmTFVPU^DtBIwb)6cfR5BirQY6(wnrvM^QW+p*KYq# zcNWUiFD-FGXU2}kzG{H*=rFF*r^4ys2k;~IrZ~At3^S8v!0wiKrl~xUu3xW8t(wAO0>aJ+%Y@v)fNUfqJ3v4Za{OA+jf-Dw}(g%2j$w9`m}e_8$#OXqKAUZ1tu zpfo)&4eMvW)y?S+k1_O0=yZDd@eqq%Jjny9NL2p2q%rn?!z1a>My5?)>P# zU@Ru1c(ZsrY>OU94O*3WP>vHn7q$*p1aeWfTq-r{y)AYa6hc=nddsGmE#@oE{ACl< z3TaUATCO-EonI6G2erRH3mGL<`orfOY;e=HUF+}}6e5Coq@ERjYrd2|(aoV3XOH18 z7uM5R@6N%yvoh30u>!^)X1u6tCZ9OlnOH1I5h zrZ=GR#BDTK?=wms^y5=kP2kTv*P}(_PKX@mz*YZdvTKbGNQ8R{Iq9iHEj}k=bmK#a z+95~A34HUq6ZPO#Gm?C$4#o7-E?ie|_}ur4WJAV}CzD19Zuc97v?wqcmrSalN!yh0 z`|kHFX^$yR=v^myxrFoQKVvePqAS~ zIW|?wgMoE6D#)H;lMPmb$MJMxyh8(qKRyI+tB;Yku?uj)C?%+2Q}KjmIv#(&6Ca1I zz%&nmGgW^SroXVky_0W~k~cnN5fs6^$6Lugk2PZV#7|YHXEz&&0#0Le2O@ zs9UT96Z+~fUau5S_>F=BRS!5aa5pSCYyiXMjG(1%H8y8hGM!loFuWiLPCN>N?(K8P z-&}!vmS4r%Q+2>z!X5Kkcd_zSGay4v1D4-Q6#2iTijXns2cjXulE7q$XfLnTxUx*Vs}6f4mtXc)*HA6B#!}kT{pX+WTGM znBqA2U8)J4pNd#_VI#Zq+z^fO6R}eCnRU-~WaLT`>K9IelRpo_$0d^7L@ytn*;S)$ zWCZPfaF$(>(&5Xd*>bh1eQr=--0w zBU9Mu87}-s(J*Sid?t@wr+}pfl{iXM*z*_2LRR5F_R+$KX>MKy%e5@=>L5Lgzaq(c z;_s5cpMsyJ?=eYkor0b(0-40-+Qym(j~VSv#H|MTq$tA+hrSJhzJwG^T%QiE3SroJYSlzEg(AAQpUFz~Yw*MC!F3ctx^8EM(S0i4( za3}tm@f=;;>d2c|RXAIJ1WQIu=WWHmpxJ0AS|wkE9A-y{U2nnPO;0iBh!@^H*iF9e zlcIe~{v+XkQ{i^^2-wq+h~XwbnX9B9>m0Wg>^&_ZZ1NDMA3ha74j4_^TQ7-&&Z+X4VZM%6s>lYqR|gkNHcO0U#S-Q;8Fv?KhYc=wQDizRe^Y1y%c>l zS^>XwN0N66I*?{{y>Y9nFFxFD&aN7N!~X5o{KT{8Sa-CEALIh}=<7(dF&jtAnvUU# z8=BN7`U_T@EanTpcfgVj&%to}ME=HmB3@kMPBs^)gNs=3m1-C9)Uh3?eaeSByh?@s z%4m2nbUV-8ozLSW#`A$n19|q@Z33t628%r`bmf@PmkKqd%I_1!KLmb-q|gQO7M+HQ zfGu2ho-x&Y@d7=ED$;$O_eioBK%z~C8(%Xg)@BiO*zuA4#=+UtpumvMO3M|ug+4>i zaW_cT0ZowHFpXQ*Kfx(RC&fo2$MG*qa_FGQ`(Uc;E3i5|sOqv3&@5TU^-su9Z*e)s z%u@w35SZF3ePr@>b#$Njj}=|Ffq6FX$+46uxHeb8IV~mbns`l zvl`!W*r2UUGwxTiZO1iWcj*O`{%4OKQ>B+^B1bTQ+hI=H1q?$&u_xg*JqgOpeZ!###pGi zJ(wO^aaXJuD&o_g4xk(E90I-QQ8>#xgijXyZ#A=Y`5l$xU?tfOHTSi+!QNc_*tiYb z{7qO)jx@hxT7>&N22xwWZQsA85y~tF(pc}~kb2VzR+xLk?ch|nwA!0%O#4L2Gk0@K zw|bb_yBYTXFkq)>H+0yFS?3jVI=0>o>=QOX|KTg(p{a@TPud|{ zLI%e5J|Nqz+sVf{|6$3CUyyVtlfM$WvGae_@zpv9sZZ1%Hh!x+Omowu#;6FLli%@0 zg(G;++nv<6qzN@?5)@Xo3Tu@${B_|6R2KT$1z%pUMROYAodvVe`uL8uL=@l_4<}sg zK8^o9zJcG4d<#Vq()59Ip6Jw>WQfj-MY-f+zE(n$emfXX?VD7o|2tQ1(7TV0dzOtS z*EQkOq!?24tN~N@jibK~1&L!e`_g~sZeXXq!0q7i;HW1_$N8u7ML!16T}f4V_Kp zHMxr`b?)MZ=Nj}>Z8GMzC2*hMCP-@fuPmGKLCa@i!R=B~(N z9-ian7NJ~jVhtFFb&;&K>!Ex7G-~ual`F>7!SiWb#b%{CG~=@}pJqFQ_fPGkOZvuh z>6|?DYhMZpwX#4f9mowOvgxQD-$4{LliwUZ7}X3{;?h0i=uiJ!D6ehw3W-KI1&o_vYJ#J^C{uvjEL;}Te#kE5kUQT)$G5uXwDN#yVAjYidG)VudFR#@zW z;}vr-?y^07VCzWwmkuDAlTXp-;j-MW$(k-@MzrJBUC`18ZoS@wJ_;AskoKc!O~W-f zX>k)`_g}(k<~d}m+!&Z7>~-p#W?1>onad~NxCjrYY=MVL4yf2Q(E5>r&$_Y*j_RF)@C~b3 z-$ysvm6O8@?Vh6XRKZXB+>D-(kSEoiPF#J65*^{ThR(Hiq=tTH$jUYQ_y~P}5sZEa zX0wO!|2}jAzfs5aJSXvyrQ>02)qXlKy$x@^9f+y|17@1@7IY{)gL90k@%n-lqT7$S zI9PKAB<~*qE1Z>a&fQe@{#%JCprVQ;8E9aSYa(m-p~NrAR|Y z_5oh#(`tevQlhcQzntjR3NGFKQ}K?5CLZ-P#to+|(5`))sM%6p%!1R%aR=f3*?pS) zJ~tGOG!B56_K~b~n1MJe&>WJL!_Z)}0nRCEAb~xxBClJ*{NB(M9_k6apJXSzm}`$_ zpDIXat~xQB&&8c;=SV?I5_=;RgptV^&^A63!(8)43hSoAyquNfwwk7JpYXw=CA-NT z<3zNdVhueXVp*%NF7&nQF-!fO%uMYBJ0BJVriB@-#vP?;tt~uDprpr3BV{U(Nv~DsTA6f^$ZyCUiCVxzS9*eTIgJDs91MHir z#4n!Z*w>s32g2>Bhk5{CopFO|hiH)vqH?%emMhNB7)kw)C(~sz8F=)*7nTgD!6Gvo zR_Y!vN?frR3>D50m*vsuyww!cqclJ@F`bd-Tf}Q(1@qZ^oJ}abPX1QzWCzar(jD-ruc|-Eq?zPFJwal z$SH++7^*#%|6X5?&ulYLVfj!D>6!s?r{(CJi~53FC!2k2Y$q~zLhz{6im7?xX}hBRj-5{+YsD%L;bn;urDU=CwGde*$WGXOiIb-6&ss8O1L7 zxNXWFVtr78ZP(nx!p8l=lkWzhr;5Pu^v{R4rHe_%c{71G;YucIA7No#qeYdrgK>ZE zUUA{se^~L+1x%(GLZ_o5ESq{6hifgPXGaCW7MD2q?=vUsk4)ohGO{2vbtrD0BSi}f zCeUN4CoyiLGaZ~J>qjvuV{g!Z#KbbI(ZIG(?n9N8#EuX>!~pEjPuCOP1K>M5{ikw1HIz=><0 zo6KWw#-PTL8K`}H6i$pXhFh;s(0+(Ij!>R-E!F}ak!lgWWq-{;dm?`p~1 zW$FBz-bk9OF_5Nj9?chi%*9P*!zirK<=tn(!TIwX>~@bPb?2h7{dgeW-!=g%x_%L^ zWP!ge2Vn8+VzN0=aLol7120Qsmi{{MhB$)x-W}}x`9kL7YX`Ni(QrGdm=*SU2keQ{iWc6UT!ZJ*5RQLn$Mg|b$vd$IA6{b#E*JFq)r;2rq1g_U zRDT1K*A9^Cn0(P=<9-O3yg+cd=fLtM?l9ipiW+>>0QY(6#AWmo@;pYE!=BTI@_XlS z2jOC;-NhKKuEZ_ZU4T%RqgWcHMkAF?socEV*dH{WR_KOP>GA78lRUv8#=_dgYb72# z>j#Gdgn4vT9qx6$3D1=(@awr{Xdv?9hZK$y-IhSn+(pS;{CNcMTj|*5Dflv9E+au2 z572VLOLl$nR#DKxzgV&O2f5|)5?1Rx#Pi`<+@+@t7pt1n_#LfGwyPc5OrGI<^FEws zmWBdJhD)le^Vdt2Vg0!bcGp6S{5)hsYc?Fljcq0zuBC|cE?l|zXUnl#so#oOzWn2cnvlvfbP8iOEt*YUW zKZh~3T21gpjttt`fL_DX`Ag*{Q0dOXee06={Ec%viqibvv;CGs^*D(#j zhHLYv&e#?rRi!`;im%iC$`N#QiIA&PT1W?K?V^6ASxmk(m?}JU;CCR)r`PXN{ z`tgAdyQ@2mB}wpfOLz9ECpjPqIPu|MKx-G;ntRT|h-Pp2g#S6IJZIEALUNWjne zGx7V3F7o=RCjU>e7na{m0a^bX!ngO3$Ko3###&$j*{7fR8K-ov>Qabnr6RY>>8 zaP{9uzj~kfjMaHDG3O)X$B12YjD*h{6xT0a%ZgnB z=*K)c-e;!|(^4{E>Fs#j?X`)Hsq=z+uj}CZT7c(WTiF5ovv6{JG%wki&YZs3kkoIp zXlU#T(bW|LY0Y0#{-0+zCOfZ$vD1FAlI#^Y&2$%ZwdM00zkfi|0&nq>gMxG8_6%M# zZY-C3tH4_aTG8>}WVwFJPpoR&i|%hEsM~IJz_J)Jc3};!(3PcQzOBZ*$7jXeKSqd* z!p3p_OOt;J+QZ#%&!sV9TjICPmOefB8P{x*ZrrJo%?3{^09AbrNGNpTVO5jqLZ5I5 z+_*~U?=(?IPYs&qdXld0?ZT!OQ~IO$BzBcQ7u~q$ffWJc`8uiDv_4=sT$v{@n0&;< z`+yXW(Z9@`kBHDx(}8y^e<{q65}={Knf$f60q5rJW$6J&8XsJrD4O?LxG%p7ASvy} z@ZzNy-aZ%D{(I%L!P!X{jt@?pE}f^n?u>Jr^0@1`xPsoF%M(8r>2f06^4sni9lk`0g0YS>_?S>g7(!GlaFh;Uk}z)ADO@(9o2^UW=WtLTrPTi@G-eEXd^wp_7Pee_u!EAfqbs%DB4rAT$tnd(O$n6 zvE-%k5c)erv@TC@ZU!EJRnGlBGINH3jA}F1l=45IwJ=1;L3$y`SBed-ujk}HG2y)mTS-lZX$lX?+PsbdIUZ8 zx^ta>5_G<&C;7yd;b&QIo<2|k=X(Y6fjj`)CwX9`P79M9atw1jp5nBhqv=tpSiBK6 zhR4=jgb9{g;mFM)U^O=mbUFi|=Z_niVBblyEH}WgZyWH)B7LZweNk+mRlts&oyM+N zszB{mOR)9}Li^A$pt!^vRAp5##Ki&rs-F^!!Xcnn+rnNNOXKe;8=?KkW0LyJ6Fp)h zVOR78$eW&lF+Y3Qm)Ganl^d^cX!mgbeQG!EcxVBZ2PiHmo&eK|@3Jd1H!xb&K{mI< zL7w{#m~lEwG+^Q-wmo$L`+SM9izg-dmNiQ-_Hw)Eb7MOBY-kUOO7_&~_DHcD&ekVJNoYy&LF1^ zr#_!1(3F|F{DSH~{Ir|%k(qWh`(hM*Y3a=W?8t)&)fN2v5`Q|fM!1$M18`MO2OLVW z1K-yix`N~2`>81YE`ZRwM+0!u-4Q&l^%2a^Z6v<7)kJ#YbS|B;lexT}#ltl?(Y&>T z-YZ;0Z!9W+f{T_Uu|k{flPUv`%zoq}UlGS~)8PAxi>NT`BB>EOlIP8myumk*Z?Aod z|8~n_X7_NE6gLM zDKVnlE5>kp+B>3fqlJ{Jd6U*f|CpxtLNxQufCg(*Fw4sk#qW-=vDloCx@(gmW9~6< z(pkcms^7!;v&O@THMhhMX9j?su>Sivw+ODj(4wDG;z3K;Q(IRfvC*tXt?pp-UJ%MU ze0*Tz*9vq>+JQ>)i*WLQOU&-U7}Wi|0q?rhVexMTswVJUwEODOwey}ZBiR8v{tW29n`Fzb0=PFNoq0t|;gZL4^j7Fh zl4rG>)hhj9OXlCfmy2CVB{b{2tDiIcc)T^BQ6GKu@hO7VGd(tMVJJbaw|kaWi0 z#_WQ(I5tp*rk*;;XNOPao!M7FvdWj+e$oMjAHi&-%?=(>xqy!z=mJvUK???tqM1EQ zXv8~XTI4lHJW-+wG%T0UwY#nOmTR);By$;rwk$$q0#HT-f2kZ_8<}9z%m6x!493(?j!jG3&%%=wDgEcAPh- zS7W4T)Wd%9XHcZE%U4jdWwW5Q2pb2_UV}5M#jrr+LVYGx!ePFFNYd^shUy zPEQr&_XYFf22)!6I|vH;3t4^ZRBjiQ0*kE^;N@4h$^ zmIw}&=a##|Puu}6f;3RaIS00Bv|(R}8Gz|}tb4K@oaiDrGT4Z_?l}r; z8VbNL;1M>#Gz!x@Nc6aBUgo9?!N+^iYp#j7_vJ%4Dx*eQ>oZ`zFlPzwI?U`YoD}c& zRHr$w*MsfoNP1YJ2h?|L5j@1zV(nq(IM?7j#t5BdFS3trDARx|k5y=!Y$HgHcEe#) z{J7_^1@z0*Y#MmkkIFmyK)CciHraOoIyGnUKZOISXIGEN$B8nHvHjqB_b^Bv`^aMF z1<~@vElgcH6Q^lCfzTr+;4*U}U)MPwORvj{JNBY2X{+U(v z6pM-*)dUyuV`7*SO*+>LUXDJ&)js`9M)F&7y7At(b7c7q$v>Nt3YY z%`%TE9c7fm30U1y^9pKgJfUkSP|w~*G_>*C#6 zS7BA`7P7de3VP2jWa9_kB8e~6FjHX%z`=iP^_+)n={`@ARg(w^1+mR4G|Rgy3dldkhFa|QCGN=SPLJWzQTi!!Zq1mGhatmKNCUUjkRnFB7{H z39N<;z}yor$aEpU7QZ7w9M$;(!gmjWS?1ed(8@0MY}6xiDB&>s{nQm@uB)ShX(kz> zW(^CTgh9{EZ|tbkT{17%6)JxZ6`1ea*yC^{?USF0CJq}*TtW`R$(~l+osvNcuFZ!R zcTHhV+XqHdrLfbwkSw_9gLd;Lh%5Flg^Rn)A?9Nc#FhPH21TY&^NiwFFGB?>EXeZ!@(jV9c%Y#vTM;(ad~qhC`K!R>+6}UHsCV*IOecOb(rwK zOfF=#qcgC_yn`fm>*LE`Mi}Te17a$>i0aoSR(T_iJQC)y@ds_8&!T`hZ=MP5l6EBU zhXjVbUd=RZouRVLj!ga&h+U6{qQpld>w{kfPUU7PI3J$@?#f?bw38Qhos}aSch$il zBf&wte>Ac`inMOFEgu#U!z<)uaQ=!+9A$oh4JfT4MT0(LfX8UmEVYF9CtHYL>PXx+ zt%ma!RrGA0iL2@@K{iW;2XrRDq}9G+-OjUY%33=p^>{1J_OORSOL>%QPKKb>5^%!o zAG?;QA^0bREMTq!6s_qZ7&r&@|Jn&Y?r-Gc#!)0hVIBuUd$uMkg6xn7pfGS#N z!{^hKK6&&V_D*QP=F)sPkh_cO-BY6#;{-4Gk-_-uZ48ZDn9Gvy?z3sS)XeUe_Oc<4 zQuN-%BsyYq7+3rzFp>RC+1L4#`Lk*wv3|8+8hQe?GNK^`|-KrNLQc7TC0>qDY-W;H&^X zNTWmOTuIW0_au1SVJEt7)1aonu`=SSRWo7eqZG(~Si*`Rh0W07 zt>y&6Vk*h8UBfYc^(KhEvJBq*o`!Z0Zizd4Z1BzQb8L4-Jd1yIM-&w+T-O{MR`PEl z1pH?K5muh~IeZ6G%DV`OM+U*a{u{(s=_s_VV?^g@Jt@&^hnx{J_^Cg!@cD`q4_ib`kKQBN{&+J!y;hF-gHH2y9di&DyO906 z+OTNh8S-$mGcUDJqMvhhnA?w)c+b)pe8#>Mxoy=4(eStEFIR?6eY^Olv^4lID~Ug? z-9Z$cO2mF%2K3l49oiP1D0=2p4g;pI;%;60$fSb+TOzLGzT;&uYwZNO=CD5vh*joG z60XCdJ3H9O9xH&}e>;j^-+YSQa0}zL$%2=$ z;5@JUHIgd%AEleu$H0$E>D=K~GB4|W1P-$|L)-b8^o@cS*o@_H{>yPJx)V;f`JN`r z&zK1e+fwXa$N9RREqvmrxjc@pgwGE$c(VO4x@c1%&g-n<4?3KAA1UO@AEj`}f0-av zm4+(IrD1_V4<9=3J2@^M%3DSVIa?PguCk(pe-RyHx!L;U(VcFX@i3PqVf2sJSx_ArP6;_(Klbt;pC#Fd_(dhzIA?*cw^)TNJ{rZQP}|Q5LNEbB#(XBjEj7#$h$=g#p7o0hBPMR;5I!) zW&Nitxu_l|u3tuvmF^eSd)v_yqm+ei{A*(Ubvz#$-vBqKz7_V)x5OqlGN{A;46v`E z^vJoN&?XfKp+7!ifMqZ=Uq8i1@87}V%6G%dVja{PA>=fN6k^XPd3t71BC$%=pj-2U zVX*dVqOseE-yZjn%P$$(v?Od4@Ae%;i*nB4HLdq(>pU7_bS7|TnYYYj{V8H?wH;q7 zwTl}sdhC$m{0Rc-K^FWZ#DG!w10UMHR5&+(g2#J<@&QM4d#bPq(2Kd?k((ak?c3(lE9zm~Kr0!Pv|HI0-{->f=s**@RjHw_91Sfx zigGvq;=#%+o>XYad%E|DZW+q(8%-0b@3V2VtK|mlHA)iwlN(4veEsEnn)ry@665FBXG?dHEi}h-6*@y0z&Q&hJxo>Xy3U3bKeeR zzpq!56<&VW@;L$HTi!QDw+m+(4RYvr`#X~@G$x9x1P<(wg(Ul}Eh-+@!c`N}MULfj z8Xr|B-~p{OWX3sTyfyASyRH0BJm>RvynS;t(Jp$xEH=gB&soyY_i_kESl=Ul7E911 zJCjVuUNoM!l}JDH!C%`QQ98F7u4t!$$3kCJ7|vO?(0~1HJrv42{;;?cv%t%L3cA1W zgFU;9(QHpX(>y zFe_IBw!#yq_6cy6(;b8f&Vr6sbByo!EcV*01;zGCux9WvVtRiQ5=U8f{e>D{yQG7P z-FmP!PDfy4i%>b`xmc!09=hIUfT!Ub=CxoVzR?~A`KMlzr{l&z##mj*yD$LGYn#LD z&}a;tuSQ&JipczBb5U>4VvH{m+r-y~5UW>xq%=Jah8^34OFPDb!hfOg*?K#f|2Gq< z|94?XdWYGJL|Anpr6iVv#V~d+&a<* zZfcb?PpPdSmt%)|_Rbg;k_^q+xg^)t4my*T6T7;mMyGyi*?(DJR zxse;NxNZYPbpK>d=7Y({1-nG=61=f+U;s*#EQ7G_U1X7pA7)2KVNCfyCgJu(v|K3` z+xv>iqUj0f+GsTl zIMb#81B*VQrpbP0ch?spC+vpyrLE-AsTQnu_JnxhIW50$O1q}chtRKv{MWS+E`gXE<`T?{GItuFVs^D*LFs!_8E822+AdHVG#vvyG4=%{3 zUAM)sx$Qf<>8Ami?&+e}`#H{ky`JBS)8etA8$mIk1AEp*!KWsDoN}%S9y{*hscCgQ zf&s0mpnSxm0BX=130EDKQS2B^{TDhxgv@cSveJW2i9yKDf6vdX9wxB#fGbwZ@NVTQ zK4eN6%KsP)r4@SYn1%~>?>I01+c}pX(N{*V-pWQvQzXk1+@MX;s?ljh9dA1za61Rg zg$Wr$A?sNbNa&4eywk0OJ%{69UWAw6HW-VOveYq9WX!hqYmpP)GGMl~MO1p(g#-v$ zo)E!R6*oc)e%(;OlnH_x-2RS@SC%|}oO6+lUN#T=XX=A%%_&SNJqXlI2@l-#L_dew z{NtsqsFJcBD+1 zm2c=WFiVo;ySuwP{}Uen`^`lnB0tE`MOye8?xGcupCvgxKSKE3tYKkcp%a&chD0t} z8L~2B<>ECHgH|qHpC7DdSg=&${~j&9zu3rmV{mH1b)iN$6ywNDl(G!QEsj>~+7fF_ zoVbGx*}Dk!9_+!W^lQwtDH7F>PQ#v46S05Md-hDT2n`JXvL|o6Sm2{|SRP2xG-M}R zlq4zEew0UOTN}#{7A`9#KX~N-UX;rJzbLbbtCxj`EL-!x7xdis|1N0dt`;&y^oA7P zd`5c5>+xsnZ1t9Xgg6wqqefhscfFzJXsj8`d#znyZ8Dc%W~ZXC&H zY0IFa-zfowv5DqBx5fjLuE3TgE$;tSm1-J|;ZYRjK+D!|$9Xw+e8Uw#jIWf$Z7bri-f0SL ztQMh^i3GQ3wWAw@+QBzA30q9Yk!>?iV)&^+bY0#A(EE~t?XymZRgXom+cA3lLQ(|O zzRzVZN7t|^5wF8q8gk&Z1dvfIoz4x%*8 zR5)HgPy_!)T@~xt-^a@d|5#bLEuVU9H_mI!0&7VnxNu&dx(=gMX`nRkm7a^r8PYs` znJkQ5(~NiSZGddKYFsXCt0LAOBI(7cB&qraoK$VX8+o3{t{;OH-x8R^*D+|&L--nT zE{hM@PvZT}sDwBOe}8nr3m2pXd`lI(IY*hbns`C(xsG zO?%^!!8TPnWqCUnrdrF{CK zhfFQe8t-ru)M`k@lNSH*j?pTjHJC$aju|}8*QN#!N-^lrel|(%EW5S1N5Eq*CNT=nJkaMut`?z0kEZy`QlVZsWdd5< zx1n=s9)D1N0wnJIVi)!2bKRgpT>iuzRGTBo=WH2^YujXK!<7`s+reO9nKu{?JdE#0 zDDsgK^1Ni~LzsS4nQu5FPkzoR#dXc{FkAT$C{MX8j9uQbU+V~el{SY23(C*Sxyd-Z zbq;u37)Kk!XR(@b57GB>AxY?1iKlZ<3rq0pqRzMlw zgfK2p$VBY7qGM)m!~?OvAie)3Ia`t@QdD?^A^ml*`=%aLtI5)Y-{rh=dIrSYEN076 z=fLq=1z31-DE}LG6{BnJqg=2zFMT_L95t7u@~sgx+o2GiiO2ER`EuNa58?Cf>(SP{ zAbR%cO5!pjmyNMZ7O%VTKs@NMHV^L`$FHrKCkli#8c_5e>+)Lh$L595Z8nQ6QBuW! zTP?Wb95+_=ZUe44{1fIDS)<`mMfyVNy7=I`UFh(i4PE-ylc`*xuu3nNUB0!FTc|nk zWB~!|+mM6r8U(!SJ6m+EmErz&rj%h|UYhiwWHYT<*Z z=fA1+vg>3hnD|t*qGBBXX4{Lcch?A-BLbFqNc3f!0`_bwwwe0e8~!ZJ#1=tmqO0r4 z58E4Z_cKHJojYD!^XW)9zwQj_>r$b&7dQzTwXLXr*@!-!e;z}|?#9-A4qW&|M2SN? zM5iOqlb;=0bZXmHh_f-F4Y||!^t@h}YS=FAyn!eZqR=syZJl_qH-(hu*^FucqX=S3cAIy9#&M zSr7}0Ma0rRo%dza08W1XoJlIuAWeU~AXw=$9(0*N)4rdiHHkyG!=8_?)Ng{f{&~z8 zWFWD9F0IhtiT5P`!xnl8A1Pb00cw;jyLbW%N+{en;1Gu2V<7ieChV5bCzd%^*|UL@ z>CEegSP-fLkyruT=f8?=i*CbZH{#&v_hn@Bjs_5Q3Bxz9zOwJSeAs348PwX;4%g>* zqfNd8-qx*PwDg{X`j%w~dw0<+DhCDpwb-cG$264=qm4G@SMKI4c5{}H39{m}b&Du5 zJD^9El0|Wv%3C;So`(TFv-qr-Eh-t0!9r&-8usKhwAjxjV=E@mS6Li}RP+-6)x|Jo zEqCuWT5FK?u}ZW>U7AVk7AB6HYw*Jf#EAzkK<>yTY*-it!!8Ao7n8v3{vL{=6NRCy z{403)FJRscs-m!NE`~?+a5nEyuqbGUi13LxWAqi1uJjOt-EH|zrv&h(XDREa$9+1M zqM6kTNWs0hHUA^jd+mS;N)t)8Z#lLYJ!Cqr_u=u=OZXEOQrx&|H|X7I$HccA>1c-% zIpi>xHVNC)#Ke1et!)cCYvC~}?{paBjIC-yY7F?f>{Irrg$xyQYDfFSLpUBZlVqr^ zAX*c;*#}j%V4ArER_&Lk3nuH3?Jrjmoh>=m-%EDEm9c8LxXPAIGfZO3W3;J*jT^h+ zlN6ZzIf*`dT!{BTEi7uugYT>)(JL1rZm!eln4uf3(m6t1_e*mto&dbQH62%E9bqqJ z7r-;^Q%vlT7y0!fmv1=y5yW97N(5w81?FgRYrhZ7iDO*!`NU#aqAy2}%07aQcWTTd zb1`D@wT$R>FQg59T{t|%(%~dy{O_?g&1MFyvu2o4-JH4P>2k^(daOzH)Xp#q?c?#? zs%Px`!GoABr-P55U%M1TLm!|KUPIV)0GmV^dmA@ti$6F1F+B#1DHZKzI+{?!$f*W|5 z9Lm*vz5seSyR(NrNfFNl6vp=H(Nu0Fk+cA};rz zvGX=v!WA#NaDL=AIPwdbCo_#=I}?IhS?{#p8SNeQ4>hn7kg_L$636 zbF(OiIi1>#rrg=Kqm$2^DX=4&M&0;oRU;(Hr}9|IStPk#oa)$ipu*=qPHK7wFMN+D z4~L8JN@FD>ZMBe^eNMtPRbe0=C=3QEGUVFH5b|qIHEdPeijnee&@+1v4SK$Y+GJ~y18rsxdoC$RUjSQ z55&P%kN3DoYc-p{-3FC-*YV?PT@wENFv7tnOvMIn4_tSgI=Wo8_KrRdwM*rQ@{VQ1 zWX3h@%eceT^(SDE@gd&8$Z4GT;soqI{DJAM(ZyMI<9H-o7h3*Jrp`hyncX`k6VWg^ z#{HiZ-~X%(*@H@&!EPbrwGrJ_AXu2XLS-4!mq@p|#ALo=lL$ z6VtfJgfA0GyU+&QFeRY6o~;6*J_5qeq_Mukh(^Xg$HTb_#7fwOR8Jabv?AuYU`(Zw3@9okWqm8z35D%Q|*%W>+>e!&9-7y#7=N zGFH;UnkyTEWTrmt9q)(N_4;(@WO3Tidx6R1@TZCy8sy2b9-Qg^nr|#9Mjw}-#q$Cd zRQ~5-oa@vF6Vi3q^wVPaN%k}QdWbtiM@o4w%cs!|sd9wxX29}`_1V#rso;}miVJ_& z@tqro;li&u^jpYMYRx+WgGX8+FySs2Q|S!h94<|?(U+6?tb)5YR>QTLE(|Da#kn8M zsg#u%`LimRY_iqE$p>H zDEZH0HhD=RXyIN1D*oLMPuW+3OMd~{3GtYDc#@s-NP?HQ(GeqZp5WawHDZ@HABUQ~ z+3z~FDt3~#m~MLPdLU>Vm_76_0>nbXM16()4Y&fRd@T979-Z3VTS ztxeOP*24RCA=+%93GBsc3~7x92Yms0bX7jfMoW{S)8WKs)hVpE3C17N`b58I5EqKL z(p&pPAW%xCbQ7B5Ey z7kB!6raGoZMUv?r>q(w#9;-e_0<&xL;9j&kb-l3{C#hYB5(ha{Z(L9A9oUAlo1)QO zgTtfmKaW3GHZX_eL!mN8m|<5;WipG8utQ^_XuS0jyk!bOAXS-2O#OmKEk5DVkAl=H zw1&}nzmZ9yHbm#!bvW9*giaZH0H+)Oa&fWyxF~=I=-d#4U+)=_sY35yUq=Ec9@#=4 zY}iO!b1$MtFZVakzM#Xv@Nn!Cj&r!j^9j$; zJ-Pzs)V#qwB@r;ZHjN)WV9HdD4B>?*oou$DFcGky1Qk|(1ilQi<%99$=<4@;8*Kwv zvLuq}d~XI4Jr10`Cl7B<{{#PRdJmg3tE!nbzN~cf5z^-?Ovftb5<#z6@~Pn~OtcpQ zE%~po-&~7^h}Lp8m=$=GAC4bHm%zrD>-^JiN1$8b7|hu($=*mgifaV!faVH576+fg zqJiriVzC@NH9j#O6BeQ2=nu?XasaBXwsR84m5jEXDfW)cAXe=x7G2)~_NDJ|=3fh3 zlrM(O|GFVkM}tgu;Pby}FQl_LjM71Ny=?aI6*=Z{z8kfW{Dg9*mr?JK8LZlT z0Q|Pfk^7uY_~~W^axyfUdhd*Lm1ts}iP8txFbyFMOud)_S$bHB;>p>};$RipuCNt~zpWw$zXj-94*jgO z^8yq!7-NU+v23RV^gkKERgT|biTQ4FZ$=e+dzTEo_|O9r&mMy$@|lSi{*HS? z>sc}Xat>u(g~ijC!P1BCp_W@OOzDyUJ>OJFkzR;{!nr*EBN|+^h6voBtjUbe>_E5o zD$JiH?zlVE4D|}*VcukQIyg_AAFw5jF216UNp79k^On0?OOn7;dl+P%pJAXU6&p0y zK$Ny5owe7VrrvP3K6Up4U-4NH9=vEw_>F1MzuUxa{*z`tPhL-WsPU=IX@XdPg7r9Q+HL94Awc#%Q=HWdlBkj^aJPTFBeH zoWANgiGSR*z{Io));ck4O!YQ;jrW25q~6Xfv|CI+wQ-UJ=Oj>&u!gpE>crq*C|xIM z&$=gylcLmkW}}rWNjCRp&nTwTznr}y@p>CPb$yKPN&fiqxEchEXmc@!vF!G>IW$D) zG}YpesOL(wL0ab$F8>n8i~41VX_a#1(NlABJUpDd5e&z%8M$Eo=ONsWh=*oRTin3m zS4qHid{-$<`R+T|=#Me*-5`vXd%Mti&Pp^W_c<$ku>v3OYR3C`0SBxrU|)|o-L3Wn zTsK=nj4O{!yI?`P6HilOc>`B8)G;fg-!MNeatO+Qsi3`wvyq=)O%`Q$!N0v5N%)jg ztf+`8y!)j^r~W-j=ABiCs72!t7*Y#Kj<2j6);&P+XWdZf-Uy#+x!KO)P4ruzC^=MJ zh+^COQ0|)snR!r%%45+4h6$dz#qwo+qKs(tx%ZNx=TO zV&tWcBC#=7zyK#V`g6r8>4o_!^@tx#C zXy&8k@a?EQy1H*5v;Ab?Tt*|VmkcA%|@GknNdP{b}hMG+Hw>Tv82 zZ22gPt9WWu{|+~YyVg@PPBU@uuNlN7PldF}Wx*Q%NQ`iefUb+BsOdGpUtOAulS+CZ zr36{a^BKHZopl_7K!{3+BoltGIC*-&7jM4Fz!!=##9dE;-s@e>h_!eSA@iMZet#|e zW+s!s=@NLF*6|NDR1t=pp7vqdEK0-sU2z zldVEr(&9ng^#<^6sep`wKK%Oik-de*Y=&Di-aNDt+s7vm=8z?6Th~g{4?0l^6Dg27 z$DzvB2oafs+^nTJol%&46&y?aaowC?O8?5y#ZtR@lkb?(6I|Wz-g5`a4o)XoAAe)p zZGR%^%$+HRUC1%k4L$0-Ntk94ky_sZDn1K9#NZ*WoY8LeV2Uoro6aMhFHFg_+!Ey1 zNO6&PZ&2FKkXZD4Kx$eIDEDQ9w2c+`EYTt&Rky&kbQ}J2Q6>*YPqFC@oP=%jS2%vU z4n_)o;-vOT)FDp|!wg=*wHbRLRoRvLOr64)OliV<+FJ1TmnHlsR>F+^+fMrb+=hVT z{cw7k4wERx#d#FP!ltip@vL(xp4;lizW3CoqQ=WPx5Z5i`}!GBHHzsfw?n&+&eYU2 zn(_N)0iOOvX#S}ki7_`zE}H}{CG+VGUI{K*w1gV=uEalcl;RIn* zx>_!o6;{!rMx#>9VbvS#NSzG2#}~4XmBaY%9v`5m@q;zfu@O%^HzGyd;biZZM%a>h z7OlPA*~gqEWZkeL{;T?iYXuz0uKDA*Je`vymIf`!Hhg*O%!dJm=*hI%d1xS;A=wi!Y09?e`|b&5KUYg+d}(EiUzWh$z5d*sPLy^yg|jLr0#J4344Ch76#tI^ma|T-4zvu1 zebWwdm=k}vGO-`l-PQaoR(M$Rpt1@U_X$PPyeU&~|(J3W=< zYnT9Sn+=5ylS$9CFkCg_ifL2k5J!cMnhTBNoPB`H(!I6@K3`u6uMS5*R;>eBb5xyP zdY6tGi|ipND1*=2HXqL})M6j{IMKMP+9;Wm;vmftWQs!3Q;rt9C6}n1Zg{I2baTqrR{$H-PJ!hQ@ z>Puy~$i`FjqSj9Ka&ZWm^w|QPdzMinu_D$oZy}kx>plD9awnTG$q0>dmeW%MvXq|n zW!^DWxaQnAt~wS8enWd`w^9Z-Pj{lP7ngxaK@@tltVOc634f)(U~V00z#~K(C!O*I z&+ZCbZND1TElV+F#&I-p8-|Aab7-QZ0nynXPOHq4ShE`!(RQ~Hvu1`V{VM$dpFMht z5@R2jvcvW`ep-*dS1(2XQwlhEUYG9QvW4VMQDoltIPe#-HP9#jf!`y&8$bWmK$hEzpF5C&j?T68o_Uy-m&rr98mKI%kEQX zLuP6$j{2L?om$G|N`eekPP)Z7Z`Pm&afUboBGmtk4x7EJ0NMg-k^cbc-RxM1_FqSP zyl1e}Me5iMDkpG(aXeVr-(i~^reM|O8fGSSL75FjxY*|)HvLl|_m#ql?6OB-x_K=o z`48jKugc7vel9+yd>z?rWkq9OanZ+zM{volH_R^^4QgR~lK*Y68RC@taAMd))Z_X; zhZ{9``(6#oG#zJ?EmuHo&}p*!@LJOMOp@O0NQR5KY1BzN0Ls3LQOhDOM)!dMv;WsR zDt6U?c|Gid>ORw{ZS+m{L%cFsby%C&$?l?F4nM&{Qxxn$n9kOaGqEo zX>grLbkjLm$j*b*IcN%wP0^r_Utea2eFO3Ct8cuC4#wpEyB}y~Crz>h@_0kW;;4Dm zk=}4J1*b#R7;$|fNeP%uyq23oPF)ea_mL;v3BeHgt{QF}o=9l12&64ZCe3$EL2^Kw zd<*6A)xOwJvC(>{%(=rJEZsw%EiWR=gnjtC^;5VwTv4)?i{9L`e>&YNZba^wEP$BZ zo7g=9L(H=svq{qscTV@2(kFkn@MZ4-Xlxl{?~rx;pR!x1kx>lwykSBGYGv@l<4(}( zXh3F;Jhbg^fWO0wu|u#H|20X|r$+^GbZ#eWxxE>(-hTvFr5s#2+a9~_-{mzt4j{H$ z%*lz*Gw_v)1e`v0fw{8Y3^X^C@Jb{n(cXm*Vam$3nnPCkFg5cXWXXL7vpG|#*7zUH zlM-Zf)h?l5P&w8u?_#!0A7?)r3p3K*$#BwmE3S@^qRm6G=&NN)KXJ9>KQDc{%fE%L z9?wAM3+L&9IVo(&QXg7Y$<5B1mtkkobpVGPeD6%zPH_w1V782}*An)9;2&m(>OI`F z^BtQR%vmbh+VRxES+w&aWxKwnV>~NG;=Z+jub(6_ifF=+o2JBHg1Zw0&*K9<1@`$Z zE$S7>S-uLVL(SbP=Ag|UA{FzRzrcrkMk6n*^BccI*3M?8=~OecOghUZd&zNG}Ygo%`VG52iu-_ z!sMLgMCt4iG@2F9?DFTVeoP|9JpPGqex-n=VJ|c>bBMhEX-s{RjJDU3a3?EG6kkeG zXN}oZHIc(W%x?n)l?}9BzY1SvOH-dR8=~~)6jT=_VO^~S3F7Rr6S-K2$e;yK{MwvY zcu~w}-!Kg%68g6^F|O=PvS3Fw(|S3CR(z<(t%;lr{p@Sl6e$Sy#|e-{hUw@n-U_J&>iA{94W*HaR1sV7Z>S>` zlq^Kvlb!4w#~S46NfZD5W^~^>eR^0}orb?urwu$D0eN?G<Lmn&&$LEz1SY27eGaLKF{Ikdg zy~*?GN--&De9;1pGK9A0oX6~6A^!}>2d8$lk^X&Or3 z%ZpHXrBE!Bb>fw3D3dnN@7B$Nv#7z)Y_ilTirvO7)_!lD3vYIB#^hP8&}+7c@~YMn zJLe=6Gg$!-<68L+{v(W!M-4jWRDk_rWAc7OGqZl(7fjR?fax7FxQ^e;=6sn8^Zeqe z_N^q=IB`1VT&&fWu{AKMrwlq*zh$>&+ytB7`|;v7J{o-1q<8xysQ!c#9Huc57AA5r zQ3o@qSc@xtv`GhES8C#}QC+HBAV;;AMPkUwQl413Cyf}fpb0C=tbDHbLBe1$xNYQi z2(Jj{x$Z!XJ9}#Mj5>J>9vD&WfAffk!yWd1$WwOS>SMs0SO;1cG~jUPC78SR5;$gA zlMe-paq03(*1jhntcwlFsy(x4RpcMo9>1SaSJQ;!Qx8xr&mGwBKb_wHJc{dGdJ(iZ z>8e%SAPt`KT5`^k0uZJJ>95L^lL4S;&V^-1-cocF5W(~Vxz^E2T z?KI$G&;FvyJSzyCCcvyaRSuGQCe$j^ghW~mV&8HR@?%pXmS#M$et$@o)Tt`h_$I4T z+w3&F|F8qT2x}0%H+}eeo(qbf@ntu}y3if>2idHS2hjJ;hPykCLeH=a{p;Jyj}3Dq zH%6tQc>gft^VO3Q>AB3YCtPGq%t!F-FT}I=8*8p#;_k>tbLlGoO;n@(B~~w#B^Mvd zkXo)c#_(v8#UZZ0kL5G*r(58fyY4g0?5}}7;vZp zG~%3CrLCp>EteW_OeYAZlzMUan-cz;kC_5NzZ$(~ZcOuK z`Uii&ae=wyx%L;BA#?Cn7_ z7f`tdFz3Y#XtEGuhULZ3@aH4mq&-@+B6cE;SvtlD?RfxU`>sKM+kbGL(D&f?sr1uVTA z4xf^`87=o6(8Rw18)iRZ4o~XiT?)DZB_;Os$`t{a_s0{a-d;-Xt#yJQ`wxQ+pR@MP z`N!N?m4KJy*U*0N5VVe~f(6Fq=pA|oAGFKxugNcS7;H5ffE4&;G~vmiSE=ZSYLD^MhS zA_22hsOElurgMVPE`2dx^!EG~q#C40ZkQDK3~~h$X3i z*z2#I>1MDdQt5)kLA0FZueL= zdurnSqFtouiW%vN8o_8uXDaD%gxoQVqAv3{!t=wosa%8v&3&Rq-!@L5^{p|~D*qTg zIgDH{=8k*1Vqv_b9v@w4#F%>nFUL|nM+B@oQ6UfJH>n$pQ3)8|8FHZsJfrT z_zz;N=>+nps~z^{OK*)b8srY{|7Gi8Vb8m%B*XUeBN@ zO+n<&zY)ma9?2~DJ&bKxE)e244CN|rpteyQoIfG_*OY`obw}|0q~-XJi-!p@DreFL zW<%_YSWP8E-2FMoPLVi=L2uKoAg)N%<+{We4Hm@FyZ8bj4j%F*-{`b5C(CFu2@qRMs7)YIT4 z8W;$W=Sv^L#0~SvPr0v*_{&ttyeA0ep3||yEgVbf0?r;dg=WpmgvHf+Ft_0v90gf2 zWF7@SEq3zvS(#AxOH--k=tJzE5|2aNS1`~Ng5&4>tWJl4h37v5)oUa=xK zySQ^`S_4zi`vd>uy<)AT+d+DFC^wG~AV#9+!9rJ(m{fB(5Y4|h_gxa4spM==cRS(S zw^RJig-&$gi;5GT4F3_B}*I`A8fsZQKR{A8ZaX1|Za4F4?5tnGj+j#A+8hI^k1>;FG5!$RS0 zG$K^jx+8Qd%?dT8Ni$s;NRTAk4!5xT9?U|4`)^r+CJPoO^e44eY7?FDu+#WI?a{{N`+5)5$i$)4UaXUD%}h z641Iomz@e5P%bFV*q1+r?(>F3GkG>0U8YKk&$s9oh>_WVf``m^T@?7y-O?#OPZnWw&C{(vp5E}M*7LM*K3*K;^mYYH_v^7NvU5G>9pLd$S%rsBI7%B|KXy`#5q#Az;k zk4nJ`hp=FB{^=m_dKS^D zoIm{NNx#9SPK6vx6oj0&*RUos3lij%aiHZ8>CIdO(wPkq?ks_UPCsDjHx5gGaXx9A z5dk3ux%{YkiC`Vg?TMNPaqWN0*|$qf>6Qv(lBBMRcbwCZx1bdz7sxZK<@<1Fls;9; zu!e4P0jlC(07d+*^!@M@>LQ;&)}4%I5=sE8HEQABe@pO2$9Fs(sLpoIdCA(IRiWyf zF70807u^2v6P_Io0t;1f`k$-{xo+9QZ1xy{LoHKb|8WKQ_C$tCpXq^+Cl*vHeie># z_uAdG^N=nP3V{PRu&r8^zPzIeNjoLcgsb5e4&B2UA^;_Wzwz0rzt|V+L2nODAR0$G z>uQ`WEj^k5g%7>(P*przdGa)rhAPsOpBeZyIEw0C;Os2i?rxTe4N*&#qM{yr=AYV1 z`XJ{Vg!Wr79a*bDUgreHp&B_Zz|@_uC6 z2SFOy@QkPU!WZ_K?P1M{8H^-1qP*=ZMo_7rjrb!>w8S4kWF?^%c2%I(8V>&*nZW5Q z)u_`_72@Hug44nkFSuucis`s;~uLtBk>cAL*cXii_@= zsYOoD@qh~fS5RcnQso)NbEtyT%yZ*wPp^99c z@PKLj&k5_hzOV=NSCgyD&cmIjMx4ddfTrfuKrx4$GuzG0Dme`m!c_(PIE#-F1cTHR)Iv6^3oYlJrS^ z0?HXwfTp)2UDjwufB#X!nO2m2uBt&!t+pfXEX&2uQ|y|5gdJSl$js^0rj?HC*|EIY zM17+>eOxR`>&jP=<;%X{l@z0`Z z4_F*jDdn@%n&EC_yVcSQA3?%0fGWwwz_gwhXk#jZ5>qeSd+%H}1W7 zoDNZsX0rvfg>@IdhAGLlEPtXAvvl=1yHM>NM%mA!j;qcwb}9aJ=TbphH$#lLFgu|y zB?i|oaAO-PFVL~s-(bpPTc%DU4aV=iM~TufxRg(rS&zJ7!m>|LTv&tWman0+JzhhP zxH>f*4Q7qRmH{{{Cc+~5X3%v^-U9~GG`1@3GCFBdy> z8zC~<9&-4aWUAjN$dn1u(b-b0;J#b9a(@b@>vki1ZUDOjC7B)0r>vK_|70~TZ-7lc z_OP>Z8Ka=Nf^J%rK{xuJ1rK2ZRNXVe-E}rhc(EY!a8!aW&%F+{F8fG-uPFb5(I0Sa z_kv%`#lcYF4>W%2goNai@FMsI`X!YyzYZ0FcugQ(dqs#1jc&#XwI=ZO_9?`W$<~MW z?1i(D%UI2^ZvKafMeHx>=XhN<4Nb)_krQ>XObgx`Ij=oI5{0ddl zzj+adz!akk8*1659Okm@p*dzt2lAUq8wFzCV<9@MjWWx2j<*Wv8*kb#>a&7 zVsdsc`&6&PkFB;eZ(1@~R7b)34jD3QZ8~nXmB+gFdM@rYn2Te6#(aiNjIB!?6L&ci zthfHc<39p%XNduu@5NwMas^uZ{YEpr2;R<%e7=nHKX{(wL3DN~(XOM$P@9;5*^_JW zxu_SuxHcPkay}UOOqJg5&I3D93mOr2*(3rXd@+bnOQ2uZmeNIdV(ArP1H z4XRZ04BI|>luvX=A6!U%gChC4W#f< z63m?z2_tE;I4?n&Fxu~#7>SQ4Z4}G4?0nAE@A+IzNg(BKcjpDyIFJ$@Cn!BIo$H&I z)6@yVsG<1*W@&`772~4hY=kK_(&lqJyF%DvQ4GxmM`+0FPQ)KU(DgR}4%`XH%I44b zDXN*t&`jg*>?_b)=m0{Gg-N^lR>p^?L?>&@V0XPBjFf(X+=Or%Rk#_S%sPvA%4X8~ zvm!KWk}&b9ai_^6Z#Z0fE&>;08}M}|#`qYp zXTi~>+3?BdD0TGv47<} zZxBAJMVhzR(~nW-z;J3Q+^VXiLSr-Wzg;5q*Wd>FMqHY_ye3Dc>n~s)YF%Jv2%1pa z`a8^qbRBxxHwylauOT&?b9ufAV#K~F74z#|NwByIJ(b=8ZJ(Oa;`3K{Mjh#-*;9z- zPGNGp%begEZua5E#TuM)B%Vz($=!?)GP*$#PKXbngI6`WzS>3(TM!zX{0K|t)Z;bF zXtw@wE{ra40wb?QSQP8VR3=p5?}cknNyv?C>#j-Dsn4+@O@pB&?bvu|0{||xM#A#OrKhrTxZTSS>mSHl{6rb({vAu z(m(spV$D)x^6~K}xD&Sk^*p-)I3z{y=x0=0zYBamFG9<8wqS5Mp3dkOp}zNOd53#= zR4XTzIAHRR3M}|F!Wt&LVBq0PtPqoXcseY^|g=ig+MIW3uxKngq1>IL7rnz6pL86H;& z(2kQ<=$E;i37#fFngcYs{~b41)J@>HVVTgEe1&J<_ynF!+z$30Whi>%AzWGF4iX#R zp@Uc~XkJn$o43S(^pEBAU0n%oU7F8!9R(U{C`Y0V7qga*42{qMdhmTDJ@9fhteE@= zW2WoTgX*$qvrmN{pT3p(Gd+(@v~kB-%VJ@c(O#O_A&BlT_cAp)IwY)1pWaK8B_8ft z=(k`etlRdPZ4jM;o*R%Ul==)oV_z5&%Fvdh3N$d{BtGT(!K{crHsIe(e(QGuvS8*M zYBfO%x>ktcoxESDT>KELBv;dvTX)$0-bw6>g~>qEK7-7G2vTpmo zcJR3%{Zc_jQjZ*B5 zaD=ujKCHhaK!VdawC>IfWUl-{_hT{OQW*_n76~XGo(%^CTu7naXNWs4Mn^7hC#1g{ zjN@g9g1k5+ZyCjv+sbiWrZ0MlS<+!qW!~$o82okpF6YrZ2dCc|k;1=AIZf6t_V0!W z+I9OI?rB&+G#p*A`cM`Y1Yg0=8ULVG;XQMx_XCqJQ_kG8ZDkjXC{wpMD{#=;j}6K{ zu{B){4QgT_`vce8aP{RPhiy=je*qQJHQ;KoA{D%6jecJKT<4#Q^XxLYXw{YU**S5F zDm8fcu>g-hxro7|1E|(eDnQ1L@D6~tN9vRJK#^)V?Pv>lL+{7D85)JU2 zWE-d9;Ua-{2jjOToNc~bmj6Ur4Ck3%1x1l>81?TdE?gLg*OCp1lTsIdC3(V5ZBSS2S{UC23v5CKeaYNJbNG74*3U}mvW+uIU2S0^_vT^vHBn${7?z#t@Mbqd@7`{vi zeb)t$P*Z|un*C_}6f1JY^BVIhd@7{XsiU2~2>dH;|t>;4yLzhpg?xov{&+LxKZtY4Vt=}4N3qv1?Q4CvSOv-K|t)~}6Z%paU#^-qao zkeV;Xto8%Xaz%VoR0(Bg9dVm}HNF;=Aiv5Jc+E@BGYb3O!NdF$B<*xHBx+jFnG1?Q zZ-o&F`>IKM{l9`n*)rC2sTMS(FoF?++nnG?R>6y~Ot5M4N3nWm=3z?+Aspg3s@8>!9ExC~{`;Jfl;)Rf@!K9xW9WpFSaQZ&I>714asV}-&KN&3oM|zSc4AeyuiHqiuBpq z39$Qz0Nwp4+d4Qyj;=XlN9Wkd(5D|PaJ%qws22Ooo>uB*tvGa+&&VYDdx;!A$yA|! zD()z9TaNF3KL+ob51?`Z1HyJ6p(Tbj-(8a57;ri*GOj(~2QT!7NCYxrEkO%Q-?ve|| zdpa1u*D5rP^VDnH4})zfE9vijPK=A_9FpX<1H}S;t)D&%B3qvLFmuMcAY8|qI8;>f zmP-~w-fT-+I$NF?xZHx}&fdJ!=YB%mWkLGak>T6?PNQ!Gr0JmE7#pGen>}W%%IcX+ zq45hiSWV_^CVUMS`Tctu?pb(-`68}J?q?>kzxHU5j%Hby<;ce|!5LJlQW7uKT!6?D z3rgj$V1i);>UxGt@b7;i(?;y}s!tPO4 z;t-yH!S~BvdihT}ymDEG(c2^#jqS^53nN3{{HIJGGsQO#F(%a0(U|Tzzmm2^Y=&&d zgXs2H8xN(J?kAtuCMQQc1^zKxZxo>Pe-Cj`EQ8l) zw;W;vuEE_~Jd*izB2Ra_H1U}hgG0to7{lAr#Heu%-60&rygFb&GdKJ|8~;`Ow|Um| zUH)&3o_q;PE-K*y%LFtkn9Mf+h)0dHV-Q(wiq{kB@u6E41}#^iEf0D4>>=yZUd6mxvj|T7_mOv{whuM;9iSimXR*I#rn5E`#{#DtA&hwmUnVBYO+Lwd3<)vxQK@E&~{s($T{4mm6 znf}Ya2c;*PV9DxGe#OQJ_!h}&g&9#gCwe-%-Bu+lE8}o`hZtG(^)!vUYE9+2nc)5W z9W<)3gj)V?#jV`DW3KH)a_WgO1}=ESn$wPB|52Wv;Th5)pfA?;Jn)qbMbo>wGZrefg>?3&L>10gmNQLX$ zr_j6XLtGOhKn6Z+q?2Wf&~+?>n1zqRWeG**X7w&ovciIH=i*ZJ3x&Bom@-}HaTuDn ze}mQbF^pH?Uncg%B2w-rPJUc-p~qW8`JT%?`6JKG;CDs`WZVA0`VmE9f7^lVP3y)X zPkoqLuR?Mwq@a9`09_d+OM(T3;is?^<*Ue1Z(~hD>io!O(~0Et>P_%rxd7GD<$MeN zrD$8YgIak6^IyE2h_f#G^S><{XTFVAV8FLxSpH3fe7Ii(zm97{L>Gtrbe=?J8?VQU z`hS^)O0S^j_6=vVx3d~69!I+gd@wq}0J^7xiIa?NzaFtg4 zbK@?4ty@W}%~I&l#{$OKY$^;zg)piZiE&y1_{Z8DTf9oDY<0KcCWz{`9=l6f@{+kAgA`mxFI+u|^Nc3S{e zfftAs9OOK#lThn&J|j}M3a8E54i%z_Em$-tgtEC!hDU_z^MXp(y> z3Ufa7)jCt@+R!W%*;NkB>l~rV#TpkGMN(I}F#6!O5$)W1grs>-#*@>7u|>KY$DR7Y z#MX|^P*k9TZBk6G!e>@J^gHe_utd!fTN*g=1PpxdM13OzdNp5;kAI)?)Ze{EbFo|O zfXruh|DKt6du1S*cJ~83aC0VeyayT8ns{uuvxRttJJRgMJf`gR3_=~==( zHD8Qrf-2`#DYRe%CJsV_mppZl;W!KvmRd_V{Q@@RETm1k%{we@3*YKKL#_4;;JxFc zy^6PGK8rP{b zXi6*0?3ZT`4-Bz_ck?)w&U8}b8jC%}x-|Z!8*x&gY~SZev;*$Lyb>ciVJw~)pPfVQ z>pH=INiSLNgD-GJL_doDipD#4ezAi~g-MT76B=*M;B9j@BGO@#@v}OgiP+SIZ;qOi zKmZJ4fRpj||z3&VYdh_8WEVAFkX8+J7mwf@( zxcg#j@mKb(jv-a8bz_=VMl$|qhL{`&L7IP2kfz>ILMd;S+fDc~R1XPzT0u$WJ)WNx&whJf&8+$?NXtzA@H6hL#oOI{K6$E3Chjz+ z700JgFLzFRuNe!T%EGiRwY=uRqG|jvEnTb*vbN6L@fLX@QK-G{I}Qv8(2CQsSpWSQ zyYSp!bh|eW6Xc^vOIt1Tjq#@kb<5cs8hRMF))20pm8V_Y?ynd)pU$J*w8-0r*u|Yf z->wHttYr?4JrIW7B4SA=ZkAZYv;g+=!AJPw;xkQBw6{3udN}9aqc~odJlm-nNXb{b%M5*6B_pbZLUF+Vv{O?`sUGLdv z@BMtgpHJH;7YzJl3bIzGFwWQoH66~2S{qkkQBot$(Yp!T+sefQ&t2f}fFj#1;PK?V zr{MVD|4`<{N`6|0pw6orSrxVDC>loSz{$v7_`P8T-(I{4ybCo2sFWivTN}p(i-PIb z$Wlx`y$RL(3UL2>YjL-}C4XQ8s3#wVvFXUtLEJcMeTzz0g+05l+M@`oPMnS17N z89ifqz|D0(NVv;@a=0Zq$?WIb<;Hl_O^$z7urlGSY4YTP_?7M$ zmfA5Mg8L4kXX6XpT zk+DGzywnZEzDa%D1;1Z-uOy+D(3AizG?@Qe}0gB52`i4U8V4&R6!^vr6;Z(6gutvRduLi|76Z%eiW}J}ey; z9h-p?0*hggRw~$!2!iM@xA3z4NcO->f}hi1L&xl*anx-E>bp{in}btuZ*LRo?YsdM z!-RZe*){C)dj%J7hk>cR!2NjU%kFkI;#bFh&O)P*^L}|44i&x!o0fHG<@pD~lk%`} z{S6GEs~B4%AoMsb*6?P8Ff0Cv`Iq9*xk`z~&yHdr77Ko)zSW>v`%yUPb>f9lO5E7n zg9RPa}kNqK897n9Ez?|ue9RXYH42*AP*=OJ0>i8|bW z0R?T0UsK)um6I|>clk3@gGpk?lCdnT@*V6Ltja33hQsW} z-4q}&DAq}P(f!EzAggb}1Vpx&mFm%mh!)(qVIq~Ax{=IX85Y*IkoXm;!u^mho0Rd6 z+r7;n;vZyG^_NSKY|?T5ay19Zp}J zChXfgC0x}O!&KwtnUE)ynknszp%)WkZ5NyGj<{R+Jq#Ew!#z(w!W;)*-;7IBT&vC82552vO7`I|Qc6`4G zg^Sgw;B`CrdM~CF&pfCUJ>oCA{16W}^`}uC1a7PO~giK=r@l z2CgjxH;J91z|U&zZ`(yQ8?+j&KDw|QUb7%PrV4rKDa_ILG+y}Y0JiPIUF>rUr|<4v z;)**5pwA`~oh<#Z_Rbsb)0S$CXqIQsh4an>E(fN1$6)5b7MJ9(R5=` z9hY+Z2hW7Hdzr~Ml=}D$OhK;yZ-MbHc1Ni#Fu6;orP}&p3np?BikAGZB%8c<2zxt`b%&L z8%V3l_o3=Bf&Ekz#r8xV#EZg3)4zY=9wujrJe7x&`!sc2 zH*guJ$+=V8HUqlwtGen!Py<98FNER8j>6Gw18~0ULpD{mu-U*~e)$tgyB_5{TREB08!aH@UVZX&5+P`uleXr6& z)f0xK^Qw{$IT(jW6aMfqflBz?a4y+o+L9>52_>3)`R3i0@cWG^n>TeJ=sUTwI~kSu zCV3(y3@a4hERf+GFEbD;DX_V*8c>i!UvdTa`!qd`pWRR>m>OTgrxkyyKXAy+wY1nj&X zhbxN}VCldAxG}XpoML$-H@{w$g+B-tFG?2nS|`gfX@VMBAG*t54Sc~j?eD>WG1Y9q zI}ru^{f?@pLLT~`1AMw~%=&cwnYUR4e4g_Tb{7w0avO5dH~BU8_7tP@;~g06uSvDu zQ@A}dH0YSzGJIPyfX#pCO0pAdKrwa>+rPOD&p#>exEp%b+1gq5ff&> zk(NGC?^R<-mOPHExy)1Mbb4Z)fK%q&hm@z~@X_uj`t8)C6;~I4bD27(3iAs`{|vbM zM7UWOGUk(NuE4PKQ)ttWBfNoQJTtH{g;{^R*@qEIOI}Gs}D0--U78||~fSc_nVW}6u*ta4uPaTIT zYFDvB_9B$cTjTusVH~(!&?Up<*D%>%kMeKy3LT=`xMNK|Jk*_nCoHaT&U5>@3g0r^ zw>*zpd?hI3_H}OH4+%DHx(z+8)rYMyVi0}bDwa2h6Th|dBb9D*y28J}*G0m+;OB7W zt=A6Q^mkEztiV!>97N0GDtO;BQCOru73auWvv2dHg=et~H=N03A@AhCc1tdv-26+t z+N~2(J+@%=JAeL)|2vTR^o6@=<%pLK2s7u*TJ%#SuEZ-<%wIZ!T5-2gE_fIf1RlnF zhc1D|aSIsl9?ly-{{xdZ^@|dA>ao(SomipK%GCs@&{`)8O2=l{w6PZ+CnUmkulpGG zGKqgysm(6D^q`kssl3FvZTP3rk=?54!|>H1_}~m+hjVON31L*?s)vqsiR%&9CvYuWgQa9196&I}Ah_dpNaih{q z?CgBaTTh$I4?VI7UFUn?thHOv<=P5dZhjaacNpXUk5}yQ|Kb&E6^6rG9Y>VfE{{EU z1glSX^8a$a;*9D!G%4XHq)VB=GDk~>;{`pMp&33@5%!lhVQ|+!kl*DH%FfL@38gQh zA#CdbymU~D0*-mI`H?YviheAdJ1hexXT5QcUb{$U#%(S!bu>m#p2kbfx`6`*??KJ^ z=OE6&jD2~cP5wEH+3AUX;81MAkM54fv!A}9x$z`;tUrobO6rsO^TpyHJvMY=VJHq5 z63h=&x-R5QvSE(;Zsubxbk~ZsDD&DK9CJf~m3{|s)|h&}Tcdd}5VQe182P|CGff*&NU zIDwbP?Z!Zx&iOa`x!u`V3S}(RVHHy--Z*$sa zW^BgCEI2!5FbkHm!^)5oqJw>ZVTYO_D-lMVvNKPDOn)=i>?Oqxy*U6MCS1haUuU7& zKTW)9`8ODL&Yi3pufn((Q+nO$B=p1IfMK9B3yS{=9|K3@0ACH(GYH#sYVU-)7icj8Dhzczgqs&CbS>VWO^Z^khE zXF8tgS9Wt3HyyxVPm|g7zE7y#w92Kn`#Cgu--WYvaiH6%K>vQNqaSlJ@rm3wT(`yp zJ*LUAoOdI*J2eZa>6RiJcx5PcRAvjj{flQ8an0cr~Hgbex~OWJP2Lw&L>MF$GH!lOF; z`%h9d`}s*ayjB$!KG$V4=S#5mx5`wJenUK^#GHO_ScQiYKjX-W)k1G`0ctJ##Vyh| zr7gS4Ay!GS%~qt~sWpG_Z*vv*RA)W)4=Bca2I{QyN-|rhkVa~HWn!_Q%~_BWhaY+$ z!_*QRTK9bcTd|~13Z%{&aPuM z47uLG|4|oiY1fN+&!A=W<>yfhvUgx3-l@Uxt;v*9Wdklh!`QRI`^6zE%$e(@>1=Ak zLHy5|(bvNhMI|wB&@wZgtIh}~MXsZnvmEcn~wFe!wN_K6Tmqaup2r zdxBrH4nm4ogUeO%L^NpJ!1~3j_)XiTsH;zrtdlCS>_G*5`4vpJ4lF@wS6z7ipbZ@J z_u@b2!4N*D4p+@D#OxYbCavejGFPppZo3>P{A9y>`M<&4>vqG)Q-}DMb_)ESf4K3$Z{R`)c%lh!#5ml0S zDaBkHck}wc2GJ8{O!q_Yar0F?Y4z8+tk}v0N6Q*f&?hmCE znzIo9DjU5ThQh0DCFnGCYL%o?JfH0EO-eH+iFS5f6B}z3OXa!bfl>p}jE|7}UWq#1PoiM&S5$f$qIlF!Hpnvo#XyIz%Y0pcDO%9-< z+HEjXn01;;+wr%ImavmH2eHGa9^5Zk)A9%ldg-D@_mTyU|7v?G>3f6KX;auB`GHil zIi8CTY~=POiAZ~p8CZC{#<}*H@av<%p+E5zbp)+4EiV9s+(5|^rb5|zIXU~}qTgK>v~OH$Ajka;A}rdy1Fg^up9 zWmYTKd1oli`@2YS1L=L)5{iBLgn!Yc|YoYQnh}PvQJe zqabAEZOB~u20li9M2}r(Vc)55+`6Ne_*DiHB>JF4?c>F8si9jG<(0=b57uSw52EqF zyQyR)$-{kq2b%Sqg`FBD{P_`wNb6J>^?D^iQjoBRJy`(Zf}(zGfF);tJDNt7b2K>c z7M|0{1;c{r7-6)EefNF}Thq4itv#jqV?h?&SJ@4FZ_NX>{BiWp;x^wpvzzlD`V1DP zGw5IZAAhFmAcQT(16ESXH9^1@xgg#omnbsCDw zZeacaBWlF*aU`jEeZJ8F9aGk0ZEYx8>C{3{R~F1DaW{B+U|7WAAUJv@C6 zLh1GKVDwp*8uVvTXvuE6qcxK9Ev(_gNN?6UG7Afg>Y&IeUTnIk$mP+v;oy;qU|ugJ zu;hN@hsZI^+0vCJ-8~NVvxT$t)=ivu!vu(L-NU3P8)hwa#qR$a@lky{OwiiGw(3e? z%hxy;>wS@|e%n$AxU>bI^{xZG>CX7pTZ!yGcuuP`pYM%Eh) zdiNT6?>{&q)SPC!AA(=j`&svHX=bY`Lz{SAtk}Wm$^BuZwxE$CZ;py`F2n4iPX5AT z1#W=uNwDIU;bZq&eB5~f6;@`D1$&38|K+g0E>qF8+0_`oa|3@mu?#9KpF$w!^OaG3 zSkbRYzmETbDcujC*FFvBUTpw}h8XUQpqqJRoQlqNv#8R|5Y_dn@n^RxrX5Pf!A24= zdyXpCvr>wkp4tI}GH&yJCaGX1c@m!=3nMV*QDM;yNbd*Lkz9!!$c29R{CoL+&wLvs}`Dx4OeT?G!qN*P=Cc4 zIcC8@kQZ`~mhi)WHz=Ik3XhEsf>YZ_?tZqQv)j`QEJ%*z?S63fH?AC{Gww zDxA~b{Q($!R4Y#jICY?i}s#_;zN~iIKl%rA2lSum+v5LOOlYAE9NSt6S#9Wb707v zdC)%pDcmo$CY7B*U^!*Ic&(BLsyEMONiTQqym966vz!58N@Z( z&SAr@Ex=2Hv!ZisgV0NGBh9s|xlOHlzXOiV7(g$SOod!d22M@?B+jo(gIGN~7`mwpqBFIjP1TNS{2f7f zEAV!m3ha1(J;IuykmRk!AKCbudpT%08}n))>}a1w*9G7BA?fH4*Vl(kY8&oC^6T=zz%)r@xI6hT}eS-*(2huUw>46NEyuSZha5m zD>^~pT@>U!k)%Wub4bj7j+cb~@jgQv5Z7oj(_L=`PQErZyC#E!<6-E3L%8f&0OK4> z;aM{Z6c@00DHI+!j@q}3*x!jGsQj5sju!^+vE$O?GA$`^97hA0V39)0d_;8mI6cj8EeT%E$m(*s_MwWzk zJF_9zBnmJ&Z^h?h-=H+S*gQ_ixJJPqfw@xJV8@?M z9>cZPPlk!2Sy<8j3{NlmfJqxPSc1^mNc+zO^-rb2h0mMB*8)_TX4-G@Sm!0&lCEp8 zul5(T9F@bc_HX>pl?&ik#T+>6odSKvU(qwljK&AJ;_CyII9}*YWC^naW6~C9?Z3w# z$akQCX|`-a$XxO0N5-tA83fPD4!oi{gkGt1;q}?-%r&G2Moo~XYUh`{_ht!PPbWcU6X#V_Vc;<@I8m32DT6dgQ7Q-Y95zGXbR{~RwT)!{X^14`GN3s`4ewet zaJt{zv8#0m+gQIF)z4)^^MF(`w0y@03w_0wDO>rIAMbmh*`znUb1q1~0o=9eVnQ(4nVbF%}MVL~$Oa2sx-*(=8zN z>|gZyDLfy8Wyna&5*3Bt*>rjjmwunXfz|m`d}Eq;@NqXNuwvjKfSE~Q@ThNB1MUHUffp>HW_ zcveN2$6f2hnv+2^*tQ7YLJ|MeSd;y7%;4V1#d9C5Y+22eUDzPZ+II{Zjlb%o$n~Az zywQ`Vn;WKLVcl}F{MCk*sRdYR`vT5;O2eIcfx8lzgmO>P#4ARr($d$X(Za+Gx?_f; zwB|MJt~^Uezu2Qu9Z!Ig5+lDUg-fa(!WN*ZkOI>P6E@`Wp=J4ms07C7A;NR9T_ zF(^TanR{**c*~*WtvEu!#;4-b%s}AI#Aw#$FD!)tX>KQ3WqP=!fZxXJd$bF>P#?Wcg_(Twbys{dyb)`^>vx`|6R@ zXV;J4)kW~>fik=;&xPg<_1ufVIzH*2iAYsV$jv4i;knL7Q2u2veCpgptA$+W$%G}; zs+CGp?1O1wq9n?0FeM9((_kuf2y_z82@aD;;rnvNjdO3neOEhHn_@)`4b7;KdYs?d zPY{`R7+X&aV`5q1*-Y60T}7X`?1d}1<+)aDN~SLO{@Kraw!MdiJL4eyrxu9UXTjK= z6WRQbPLX)FANzhth4Sj}f#*2E$JMPs$$fiCx||8jE<>h~vWo%aV|#!BrQcbDBf`3c zJNzMR*Nb@W$ow%d_Q4qPyY5dx2HF&R(jVKD)3M*O4l6$zG2NgVF2}5hdwu^zRk+h) zy7^96pI)tDqKV$L-(+ExwZAa@oR ze?pl~R|e!CT@7Khn+EppVH(8;F@m1*p>|qqr<4+PTov9ou8D&4AfDS(BHXc@x1x+c zTlwR=c`RJC7L+84`Du&wVbkGS+_A!u%DlHR*6W!tCu=vitL;7? zu_lymov6Vn7uTV&TRF6;Po+`C&Eoo=>HO|Ver(7R8J79!A74}2!he_=O#ck>vA1li z_@F^B9a0@hyTs{mG<+M~%v&zDcQ{^EVVK4=_DyHI+P1(m2^rKFcM^A0t|XtF9FhKt zZ162SEB0(}MT@oLap>JFQSnC+%RDb+Y*W&}{mnUkrIH_WtI3BINqceR0XKGdYo#!= z(&KWb#k21I0Jg06AU{qzR@{AA$Ta0!b5fCc&~1MJ3;ISvdQ&v5_FsdC-X7yydk2!C z{Z!Hvy{tMh^$6Y!SK?EfUu6dF9IN%aLcG z-yF_M>E&Qfk`k>}PzG`8S`|~m$h)mj6b}{>mfEe4Vr$NMKdQ}M;^4 z;aFC^t{TIBxe;e;Ez&hRfkz{h*#7?>!cjv<@+so5qel+zhj#GQi*@|yEwx+u>L5k>;?u`3IyDJp^1N92@*i+#*m2l2DTE!SHax5EOFDk1 zxcxE$C)&kDm=zD8!C3+yQOJsa%C0`hF^V%f9|e_Rdv>E@0tl{e^8zI&19sp{uNx;eh4b@9OrYO9m6Cv zsPMHjE891NQyexFoF>l($HE?vow^4@*ErG>)Yf3n?(D;w$~ojysRfma9{khtn_xfbA-7JU7gPkkz|+7KtXuF3Ms=J4 zgCXnmfNI%j{X}qQbXWf64yR@d8cM`flarIsJ!}k_0u4)C_xpFA3 zUg&)0QK@K!ObLu!UJUsG^*kSJKoz-~&|K98UJfl>)!C)+E4KrNEmOubhjMU1ybG+D zuf%q47{X|t6MZvMp`(dQgt>VUJoq<)(z9OS{#J&vx1NcX8cwE>qqL~=st@XR7jsjJ zo4APJ7tVv`*pSOo1Bm)Q5|@sSg8?_I_?mD@*t30`sN~!~kwTaj%YI>xsg>!tXhboN z?CRj+xIS(}X2l2wINBpNFN=!b!3PfpPEV*PKsaYnYq?4== z3b4e8-%>a%{2h9TblEo1H&pr~ct$efAYMKNlR6Esd$BW}HNGSA%8KQ3Ht^`KxTOaYNo0Ro<(VDTcOO_Z)Wf|=XGG>w_O;2!3K_-l!0>lKYq-~ zRFU$$Tb(;h1syg32z!@9tJ71IX*MJitU?M%sGx+j3Gwtg7h~7D`!ll z98jT&Quc7LFb+R0S|oaX{SbAXb!1^D5H92?(fKV6yk4>mxOblxwA6d({UI~X;nged z!V?u%Ufay4J;>q%)mE^%mL9_T{TttmcOk#vKzw|D5Zk@63ciOO!q0yc>F%;8;Ar=q zcPJYOCdn_V4tUD5-d8?UXKKf~d&~GxH8GCzZUT=@8}M{qns5elAO~Sy*SSs?Ysa3! z@Xv#I=U5}=;$Y8e1olAY6(#C>VaSelETL;k94PHoqqG~P+&jO6P}2AjW&gW}$ByaZ zQ(h8wUfD-cb?US;>p9wIlwr7mI@K_qR|+b|MsFYH#j^O*4~~luKYRpZ@04DiqZ7%z zFEoigFCM{`jWaO)6r-BQhakFg0{zMQ0-@J5*^c+SXn`ijq;Cuo_%$u)8M%#uM~;IF z0l_GxbesEZ*CRHn`~bf9N^tp9Q<}t|f%L~3EZfQyIKP`1@nM|+}DNm^q|IHae6CoTnr(NM@ww3c^cD@lWS9gc>bG!NZ^>XY~k{JKG%Cl?yXR)uJ zg^XE;_RrrOqUMPT*90YZgDhybN?bAK{z7r?B7D zlXKfUOuRTb8I5izv*qJzz<6LC?|w=T@PjeLuF@vU&7R_VhvC!_^qv12p)JDD0rPUsn!W2nlCW@u9B+~K&#bUr&e zrx~Bm{SK#x#?gOUPe5c+Vby?mZP;_UkXEOD7O%UcO_6DLxY6bNarMwR{&=-4b^WY> zzoqf;dS@-HJ9iv{))KtSdv8 z`|)%KcT2HHyyUev`Ut(|CqpdohWRzvzODho!`s39>2w@(_bsUAm*RilKk!jMjad6p zPnRI8`*7@yJq`Nu4(mp6tSIjk?9BD%X0P7MhRpT{?<<`+{n2skTK||Y(dE(lhAO>( zBLiPYO@nEhJ)mYkaj%uC;ELKg&T>%@pPp2P1161w?9FD36gZms%AXQ9#N+L4JNWJz z6K-GXdB{$+V|87QP&EB6FMK|>YYTkRg|c+rz)G0M9-#O#F<<+FQO+Ps?0m0}YtBkS zYe+sPvpksT8;7woEkfQPd%K5J^UQ8eC_UCoxDxi1H^xCsZL zIzNYBQf!Rb?Q7sp?*ew`_Asg%|BC;zSsCWo<-%`OZ5FAQjEad*x#~ZPEU8S1?nI~4 z(c;s%u~-v458AL2b9?sLJrl=zwP4C{NABJ_M|?kMp^!0j!jCt$atrMa!h;gAs7vV> zMxR`Qla2FepY~C{^0zBnTk@HcziW*Up4B;BA<*CjsRYdom;FopgG%chROYISB zj*Wn*__YwVUYhnM#Zpy~DcYBtki_mOP;+Mhs~>j)`VH*ZQ2Xa%oSBc~_SnGAnk4A< z>g7(yjU`PdD@gh>4aGMa@TgQAc+Z@SExw<@ZHfw|4;90n^~XV>s*`s${mxD4*^MVS z3FbaZhhHgM2aOh0=vDS!`2R{Q{+lGq?0f||qzZd)tig{R=h3(IEFUt&1H;P;hxT)?LX%;aaWC94YQ>!ePa!NIf(!a1bYB!h>GDEPHq+mk z{Hl*}U%ven&Y`pT#zPe_VT&dEe7phk!Y#l*loxAU6?*+P?x3#b%#tyQ;gD1OKraO< z5EMu~&76xt9}WstWm^KZ*b!F1`Oq>ESXP|j$`Pz ziZSUN_NHTVV##H#8@|0~%?~&x5_xKBMWBA+WJ_x>K!CSoIIP}y%#w*3wq4|^;RFVoIaYH7gjJeCr zzW!eDo)4n$#whfeP56%k3`pTqB0iBH!d>6MF&}jq__tS*-IY_PE!N>E-@g=Rg)YMc z8D~D_YZxv6+ke@-T$XOnR-hXTf52-4f%o~qpI|^5d~(a6fz!k3hx=6?m zD*M73hyA$H|1fsWoP_iCc=9-GfN&nOVMd2%vjxL?`Hl5aRioZ0(y{Cq-tfXW&SU=y z^m^|`9$!vl*aLO?l)9dRdpDEb7#H*|a|Ou{H({`UGKSd}!l0kVv{1;l{5FbV>NgCj z{e(J=Y1U@<-uB|y+YV%LTm?UmJ4B(E4H+n^vKiNZ;`K3|n6prpmCmilpW!lWL|z`* z{*r?p=Xs1Qhmqgv!Jry4mB~JEX2C6YxEqB*sBL3FDt(v4tCy*hteYE*UA!K4`P}1< zm&SlryA+gWdseN>e#MnvXn}?w2DEfw0n`*u<0q_)hu}N@pc@wqcKdWdWzArb#u^0{ zJ)}T1bZIJt&K=Jd7+u3p(I@c1a8H_DG@1KsE-*yLzZcbJyy4>>Zo@rx!>DQeXlze> z%^#9dWYd%y!O(LutXN@4i>uxFJ3?-#$;+N%s&b(=RSv%&90Q{iJn`-qMb6DLkGpwx z7|Z?s4;nQN!JMaM&|V-#ALK*H%feRJ7jTp+bmm9;`g0ourk-u!(>3Y1#e|v_qYLIz)#K_kQHI|4x7AkG_jY77p0{1Z60^HLw79Oe}7ks|8FyFimf~IJ}soQ?6VpAUE4hI%H<{hNhH{-Sw^D%eQ1PI{? zimC>)gmGmMBXF$J`~<$~h7=t9Uk5C57g64IHCFY(nB|NwgTTnaw9aD~u3V^0B^pbG zyu)8^;EPgdxhlh&Vn$=%$ng|DKE%FA#!ZV7I7O>eaq~$t zc3N!$<*a(nC#&A)jonnar3L=1_}fG1x0hsxw7%lmOH%Z1=}6cndja1i+{DWI5#aSk znzR~pIF~<+W&32%rSg?%k*dgg6EiS5$dtc3&4vC`JIXK4{K|dy_T}Dwn@L#MX8gLDw^>`SbLC6XYLzd-hNRUY6 zKMAvpdlT1S&=nQ9d(Ipj#}@Kt9~`-5&%fcjG&wq^l>$DXMr#*MVMXo@VER&rC8WK= z@{gsEBY5K`#teW@N2akq&J3q3?>nB4@cx=!@|$*-258?qtVlts_fe^ zRG1x!zVyMLib7aEP=X3f&){o!JBl32V{XM0yeWDKJL_}6?DJvBkzFjBdUYn=|MC+2 zq=!<`kzm&QxdL?rM$x@I4Ki&1h(W_rs|GhG;ZEL?t!s{jz{(;BQj`?j8pj~LIs>Ns zm%(`*`i0+Xb-9kKg|OX6pFNOV$p#wL@n$E=xSF>&fq$IIt#Vw2OO8FlAlLEYBK7Yxg9K--(;+`m~%q37u)sL?He z-iuW{J$50R*%5T(RvQ=2C&GjTJ65Bm&+3OK!=8t2`~#~@xHwvY)WVV=XX0f{@;?oG ztk%=8)r0WBi4@eSsK_vIvcb~#*nR&d4kp1ny(Jubji_j96 zjP8GjQ|-AZ&=T(Ihd*`T_5aqPjm}4y_*$M`4V*>uLN|l+;7e#-`=si?KtE1qiZUMA zWJ!ZY4}zIr7&-*88a6c(z%2}T=V+%ztRI0Fsrc4<|b&9^IZ5l?IY$Zw6 zw!)VohAhEXU>p1~z}NS~=xFp+-r@3LoL;vN7hS0k&x(-5$oS79`Kjh$mUbD2>iltz zGu?#`_rAp$SMpKf=Sm9kY=X7hyU}fS4DS*95-x0+h4z(ltd4xit-J{nZ~jEJcp-mu zsSlP3T>2~Lg=g7Bg{?M>hp`*}ak09B3#gKjp79WzIPx6)CmzR{EwtgIJ7%KR=mPkY zc15_aoy|wzD!`D7yHLhPkG?<7qt$WcXgbl3{_csugXQXEwQUmaADu*fZ39S7;}ni* zZ|7YLh3}+h5pyhA18Nsd=-Oo!h!s$#Z@&or$Dkpw)+LCFmY;^h))}l)y$O$ZXwi(I zcjDhuEnwz}uke0sKB@Y0tg>039{87_fGJ@!PDMlc7-zDVuq4->OL$vx8NSSrqQeKf z&}~vUyjZmpH%>jw87#^s#e)Osq|HA5UD_Uelp4a?)||q~#^HSUW@pi+RvF;_0&nW_ zn|t{CHheu{iubH)VcD=;Cbh^HrS|END>sTBx;;gI^8w%)kPe~y6`-hC9;{2ou;1)F zv>a~-_@DiiMD8yU*^U_)L(S^ULv4!IZ`uoAnwN!?3-qevCh}IncMVf^!q$KFnu_$ z=jD%yevA11XAgL8wVcCnDU|lT_3LRPe#J(Rc zUI{WZ{D3yOoD89%OLx*@;hhvTq>uX@SdI|`%&6hqYVN|-3>0s)CYfyu*p`lb{QX%4 zN}rjbT6#Q$)~HZ^cqls=mfGI58%v6p7I*?qO|*4L-=x#_QuJ;GJ8S&@WM&J=;HlR>$sOD{BN+-8DZL zF=8*Y)XMUkhk9}Oo2MIuo$lpzwD2a!l+YC?vP6rtzr9VHS;(ufR+Oi9KN_x$c(cip?z z{lC@fX`Sbs^V$3Te!XHHr)?0hNH~m@#%Re$M)Z`6fTO@Le5vu2g}R9kvktSM!^{ zrRdDry^`f^pP0a$hP(Ks_zc{xd;(wX7C}<5B(?3B%5Fb0gV6Pg^f2x&q~vEqTdxjk z`x(+)IZ4`n;UxE7m_O=&=s-W~aS%9Elgb~vvTs*~J#)VmYw0&*yQUVRky$zN?N9NG z_b%ujKZP}!hC=R7VGcCFo-(E#;wB2`(S2Km{N}_~NUs=<;rf%oJo_K7ER)1VzcFRH zTPlH>6rf^IlX%t>XNpvo2lYz_L=wLP$;DU}H+~;Xng8OUSyl&p6EZQYsSs+Gz5@AQ z+iA4-BJynMK@G!F`Ye*yttE z^cgG|vfDl=XdQB0LSE?eY$ntKXjT#o^L20~9u@!G5 zcfA7hO>{~gpSPzS<+nRk6-rgfkWYnbnM0t_^4(;S)YCQ z&6hXq?%TfNwF0O2OGgx!6l<}Ykhv{mN z+WItDKj{ZF?>`2Xc$vGOW5PYG8jTkmA3^*sIS9vWe*N(kyrXrLko&S=KUTzvD;+{H z{_9v+cuAjKNwcFLAzL}K7nAAYO?&ab5zFYlU%Ck3{!C+Rtk{bo z*Vc3MVwA)i`mXUU`;#em+I={4wG3ZLJ%JB_pYVd^VyO6_!}6zDz@pVBaATX0HClQU zt!BJ{2P16Y_?Si0fbK;D$LZciYi)VV9VA{$opbV zEh%-}OVi7I^?NZ^mLBAG-IF56yh3r;;z-oI4HS|23??gF$2*O|wAelt$@m%`Fg}ax z_Uh1&Q31T|?{T7xF$v;57c?QD*@7M@DjVaIR8#alOP<~(@moRq^zqx-RO}_OK zWaD*6x>bugo>QeswTN@qrenzXon&w$ob9&C=ACZOho_b<7*NpyYhDR?|7*SQ+DRK; zTvnx>NB^T~*Ub3ulk71p(F<=ctirrp1OBDb2Kuuj7em&hz#aQb{H>qu+=E^n=offu zd#c8gm!;rBTU{2~RcL7RS!Cq#crP@!Z&9Q2c8XKK(Gj zzR5t9%GOBoxeB9LRJSaqM&#nnB3sdb0X?|u@EkV%PavosnvZ{+-B_QHdwvjnm$vlp zhmSS+SUr6(Ys)QWV-1I~NzT1|%8&o}i!KG28rsOa$Nk|Shb`eP7D`b`+FVkev68bY z+s3|KI}Bdk-J*ZZ`RM8U1Qj}bpmq9WR`OYzA33ZCq&sX_`pLWKs(Vh9GVedS|NRS& zFL#8KLzN-qfde-`_LWF`domQUpZo+PSMhhhNuU#wh1cXh@yj=xGvA4iLHE-t>Uvy_ zW<3`naK?D@lG{WNh8fXAo5$$mnk+8yk)`Ti7W`rI0o}=upk%lzJ$Sc_W*?6gZT0&B zbyME*aiOlfqwgSUJgm*`Hy7b=uO@8mHDr5_=Rw4j~Ob5NQvq5`n9=EB`lG$E)4?dyBud@=_A?JI%EB6o#JX;$0}` zpB`9oNz8i2H}Ff*!KtC)T+G`Mn4v!(6dVt7n>%KKwy_3=L?%PvgduG9lgapB!*e_u z=7Ci|1h4kf(JWK{1-Rs&5Lem{=j%6o0n=Z{xqoTNm?e7}UeC;iFwIHg@^S;xnHB?A zH(QbBK6%EcFJRNewSw>44@>%UV9J$KSTn8?Zr0~gcXkIiYJLyYE!Km?-XEB?M&LOd zP=KPTv$@LVADH<)h!6d*2G&<(!Qv_ttVsLAMcD+S)n{R^o7Tu*UDgHqN917zW>WWR zGg4oDg&k^`gW=gTP{G~=elC$=hIc+=v4jB&4^w29y<%cbyQ;V^o2V2QXl)(>uH=JCfD z3})e$E&RR_UQFw^JZS~2#UXRF$yi{2+df*w_$LOU8xC34Pwf)(FvZ z2K3oDlFOeF43|R;D0h?^xA^@~mQyY%&WM;r9xI*!Z!1r+8-4iV7m2vJcQdz2;9o=~ z595|jDd1lxMsr>1hV0i-fjujv<#Zm&vcor}L}Pu|)7uI|nD^nVcynDe?mP7s=LLM` zz6M*+>)UDkwefHH3xUlz^3Y(~yZH)SOUw{<=z7d}f*l3ArBXqgGJipEwYcp5i;Xwd zz>sUt@U^NhrEc%SHjY4E;=~je)bnlz zJPautPJb60Qc=`vZdaoQj!OB8W1cpGjHD~}t4YwqPlKs>*-1W6elP4^pvdsjW{_OZ zL9j<4JS+VQYv+vT5U-!;0;@vzqQAWbtl3x2t4(^r8EHPm_u2MvMAsYQ)7s!_#B4ydn9}I;NlRyLJ$KzPCZZ=@uMlHJJIPTmiKOE3sgy;ODcl zW91=UZ~|}P=iINN(~j|QbMhM6Qy9w%I2*{RwZLw4!5M?3A#11yY2WD(y&bqw*wapd zXQM`N8#k_E6+J@6?%8r~P)syVOdiE@YCm(O<%-<=VKdkRi5mWOuLXQG^q|IJ%1}Nw z2kS)&jGZ4u2CZ9otAy)Zj<>LDB7m6xKERh{um+YP2)NO6l%M|rCF>t>OJ2^S);pczsdl5;mgITNVW$q6Yn!s^>$Ax|a0I2zoJM{J5_qkp z6R`ilXplHofkRvZ$o}pE9Nqs3e+ths(fc_1x4%TlqC5rh#72~?-G^o=^^ws*1`kxVRX2#PrO#S z2S=Aa2mMJ`IAF=};*Srl*|QTI4DX6}O534Tun!tK082>b;lz*$tl^y&89ix$Lo&hq z!@;}2V(=MW`rT;Sd*K9hy-9`osRyAp)RoLfPUef0?MX}U_^nzRLdgy>czJ#;KC=>d z;9XK|LDdMZ=ITt^eFs?Oejk#&^aEdM`ci=HUHow;l2x5lL*La(Y{WoAj5w=?bIz%= zoSXZ3!HIVNrr_SnTLAeagQa%DuA-W;>^BB(-K0TEjfKP%T&9Ju^X!j-SzahZ#&UJ51c)S$JS~K6Y+e zExf15vqzJ{%92kxKy30|@Xs2}wBDK0jYLE2PkaU5Z_L@TlZP-mJsKXW4-n?UvXp#6 z=+${d^Nwd0m=Yf-fR%?19#tvE>05_(YS(rkWaHfWX%WTOnK!Pe0am%p)Lf#ZanX-x$d zt-JvR&w8MBLOZA9b^}9?4yN*_)!5pS2O|ZyN;~@^E?Jf;TKI1W8kIW;OqwQ``A3-! zK6e$Fp0{R&^1Yn4MmF5k`h_X)nsJTZ6pFn1nLl?!jp9@5AY)D|XSS#ww*RMs6I(aI z=(Lf|7xB>@xoV-z_oyrZX1p_JPbQbFNjw!Got)==A7b{YiKu68X9*m zXYXRB;Zxr}Rze+Hgn0`Xyus@~dqgX1h0Oe!V(!q3ZFI_6INRNh<~wzI;bZk= znuS}iz34e^n&XIu&m35{)gzExJ6Z5{UPqI6uVBE`WsvpP2$jC>hD&C#U|ev8Hb!Ki zhp8{KbPs|rTbpo$gA{uba)+z*{J1P- z27JT~!!}bDuMbw$Avo=&wCLSZ<2D(z@;Q|k;QNRX>v>ah8a;i=uSWm)aT8Cx6SJXR(Uh38V-ZFXCfLgls^X_93Ei|0$I!gXgFTEx@OUf5KF@Zi<&}{%I#`Wb_X*C1)B8m64`i96 z^cfshd6)Y_d*I2rlbB{djaAE@M(;m4T$diBl1Gu4x#kz%i@c6Me8xZVa?{=gDToNK=m#@WlV;1%h(~oF zlaI0OJMnPBatz2^%1fk; z?+)|VYOum@|KQ-$7ziw@!mi1~KzZ%}oGh&%csqUF&;*&Odec&DaHVZ%jbFJ0nzmz{;!weg`4RV?MLPbg7{p{+>q z;Tf0_m`|4as_etC0d#fo7^bpWa7#Me;XW@?Vvd_nau=Vf@^8d$a6U`$^%Ms}_lS3* z?cNXZhW{jXZA~RO$f?q%C3>X4b{*L)jTO2k*%xr>$CpsHd?VYX9F2qi?L(Ez6HsZd8uQt3045!q&V|j2gXJ62 zc#p|Xp#4ZZU7V(mpFTatTP^L7Mbhk`rY+?fwc>?Cr>RN5T;M-E=BJDs4mI@!_{v&} zs^yMQqrEjMU-4!2T_X4z(IRjpwP54*GQ1`54f6J0z?df^_}wGS*?p_SxNNoHigeZNw=zL6+HOde$3>^DFJxrD?BVwY4nNsmSr_k0-&K zt*>Fiq9Xj$GX^bY9OUZD#8{{z$+^S`-tL2=sVi?Rjp^o4f4-2h#D4J8R-+*ES^Nu4 zFPPvGhOPn^;&xRWf5B)2QD zKU3l#AE+^yW}do;@jYkYqDCpMdGiR=M#f^uD{D@5>JJQ5t%c$ZPSBs_L$^QJp=-oT z{>r6T{?8-?_HPC7R|>FTK_y>x<^&wqJ`FoQDPOx8JR9A-hKhGojRL9m5zNm>fj+mK zM7K>xY02tk<81`L8vxdvFlDd+Q{>Mdk+2u{KzxcO9~48lpjJBzH%yBYA5iDbMaj8Thu#o z2W}n~bH(RBp=PI$?W&Jp3p^9KF=MBqSJDu?*4mE0q&7i}@>Xu@;2E%SZzJX|8wN@f z%$Uyk-8eI=4g6}CvmZvWF!om%mw5d?-Yb41_SyXlLgM@2gf>TIdJC|%RTVtf4q_1v zaX2aI7QexC9#*XJL*7o2H3?^884U^Y`g;b{=NH4(1VwV&y$7?0^J4DmJ{by{>)Y2Y7ZXpl86FhOaQl9vUl#Tt;9mCN0 zd?l7yoP8D=xR)1N-~VLbcx}G>%HbV~^MHk!|g8F?AHZ3LeT@Wpm)yP$#it?jf~Kjk9`E#_fGnQ>ru7=-hX$}e+9qIR7%hGlkinPV8_#w!YdTza1?odaQB&p>{`FSC zSC`6h@Y*9}w!wsEr^<`7`cCi`8Z&60*(YwY(1Ti@tsuIsu$o(Q-Z-qZ;Mfof)V;>f z)3Al^1MQsB#$7ny{1z^i@8Rzye?;5He^@YgJF8x&Nmguz-~!VV+%!+%+S}{!=EVdS zXr##8SM9{&2#$Sr=)|9cZs5KAD>zHhmc{yNGl}>VxH6@Y>wUnBrx@+W;#wwM=lqHXkhyLl<_o-Xf3ImI z^W6dKZ+X*wU15GMWCEV=&4U--l8j4y!Y#_&1w%BAz%f^mKY8>eyt&)JAMAR70e_V! zGkhWTRcVo?@m!R=-HE@Ko`U~QOX9?(f}cP}p4CQ0(Qz&Zw~XHk8?@u_UhVpO1pqD++PhXrPmG{=!FD z7rKxTg~~HxDf<39)UwLKN-q(u(p)3bs8kmGFxj9!NP-^4$KZ|IGpO&TE1al$B1$W{ z46X}hAak)2S17n(eKrW*re&u=e=H+uS6*<*ujd=@k0(=OcYJAk8-I^Hi2o^T(~$Bf zxIik1d%ocpn(d#(e*9M9W4>KR_xmL<(%y*HN8SRNkfBtzQRv>JcH>5cM|i@dg39H3 zpy@=qeP@Oa=Ew~PqqTB4CAS7NvU+j6hb>%?(Sxf!pP+7FTSU^e+wO=wi%gB}9pC z9rA>!>2LY3-^Rkxm)7JzM~Ss%U&m$}KyB}RnA>i_B}|dvyI&P>19vuKLRKxG<2Qm> zos)R+*2}zY!BA$?cmzX=CW1t>kTFVcf>%$6h+aO}34V1KAtT}j?0e#m?^HBljA$^O zojjA%3miZa2`+GU_ZzOk{wZv0)1Z>xV_ec?SNs?0O+({yF>ClsG#=tWGY2%|(a)3c z%$Ehs(P%iD<&J0HM?K`)dIhg$Qv&uWWMC&Zo9xFHalPvg{|^xO|A#5IviyHA#n)}l z3N5zZc*~$R46sMrE%xvub=7d*g z|KMFKW|W;>e2?$`T_g$+h2qzX&HT2s!{WMokz7q&I$BL zVa5MRDVE6i-;8P(C*o&cz;P@$%{LsCgJu z9sU+#yGk%?x-9cpY{Cj6N6|gQw=lkaHQO-i6*ph_nKSDuAT}rhWpb|bN;!S_pIHq2<+0hysU;b>$XKYq(Bk?n`$(73-7R=M=y zL72vscSZ53&A|e>{utA>yvlvuvK?X$g+owyGQ>(Z;Q_;9;DWcVe~#!%$+mrpai3Y`I7d_#Z~X?hE7j~P?he)*dq zms}2(Rk!%vLfbiNs&GQQ8^Am&qwS?%4y5`Adsw&MHi|a?1>nCChF-I#bxqRr*h>c7 zja2B8|9i+?aSpC*+{F2+UxMx9&DawCR4)6W3Tv>N#YP+7N6UNJuupqCnCnWR$*>hP zGI89zYv}r}2h~GE@R;>p=sdZAW^CC*e+^+I&)ib^D}Q@ z0vN&vu|?#cypE~@KH(wHS77Mj!(|+s!X!fzFmvKK+9+_8jt?+k+c&78u{f4o7dp{P zf!vq8U_Da|yut@fm1D)Xs^OO~q4M64O|dWEV`_~74*B^VBMg-w(AJHXx?0fS`*KX# z%o&#tNkQjfL3BM&iY`wP!Rl*ESz*mr*lNFn)i@V3Pa#R2I(7q7lorU?HujLl`*HpT zVVwP-u@ID5EE+Yv2|t;8vDl;1R8=EGfzhehJ7^vq=y8GNvXaaw$c(~ll;}#PGMOJ( zML)(2W`_&XX}B)dA%4_$D5nZpv04tj347 zAHnd!Cv-G0W~a|cQcIIENV!Y1oFK%UANDXo?KVkz{lRnR!ogCkN&Cv9*?ZdysGjSQ)~clNz0t1R zx|&jP;N@(J#$Fe~09m?IoIc=lo$ND24Sch{|{ z`{6-!pXC7l72{#Mu@MaT`xBBzJcO11#*)SEk+3MsfPIaK5t}_f#IMV*gj8D}b|@wn z&L(Xoe@At(WULupO;siRgHveVv@&>Gqr#StoyD#%Nr$6cCVkN{W)q9^xPh~`!|z`M z==v$-&Erhzlc9)}&RhV_TP4_#i{>1wpTSREmcc7u$hY71Qj5wZrSVkB6CB@NO2>DM zV_NIST^qJTmPQ2VbA4{5>7t1ryr zq zFj&!vC2JL*6L(tj#7^wdheP`#v z{T$(QJ=ufHZ_Ps?&16Yx;DJ~#7MYjG>++|7W1}EV@PsW5Odc&1&bkp_Yuml>qKAhg>ie)2 zqbNGD2r_n>GhMZKz9d(N2J}DYEl)MTv(8zf($f~GRl~8{^*N;RZ5UVxwy}Fh72unn z1&&;tiMtF$$o(}$o*l%cD$!uIO`3^{JMr)7c*<5xC)@25b$Xy+`Ke| z-MP7vubNmuO&tj=rpA-p+uOvxDM?_qNsspp?ckSgB(5(+nW^>kAi3@2U)^6LUVpm; zs@!Vr9PjJ_m(lzAfqj2bMqe;vY0iPg2Oq-rkHY<=HAWoK!l*2wf%C6B3tz(9`TF)4 zCRw_esXmAj-?1A;UrM8ev_&MPR2Q;i^Q-vXx0OlWSImE0KM+l1Y}u@FQ{m|2447k{ zKu?szI3H@p8m)t3(_eL%w^EPk{|?4X^%(KO-=}C>WER_=F_D5-CE%dBO=8u7G0e2Z zoDQZ+(VeVwV7Vv|Y9GZ>jdKO2?><9sRWGru`&ZDu%oF~_5KaYO_RgQ9ikWU?tTBz= zi&7}7L6&Vh7R!!Lc}N#xhLNtU1k-!4P}piHbHzaxw723aoqjo=mG@uAfjTGH8GA8z zENKrU%yt5&A%}!Uyc-VdR%BkOxlFS!n_oD634eb(qbjX|?1KIld_CNf3LYM39~Y$D8SK*~hYFzip{I=P1nGY|5w3HKaXRGici11#E+!BIm8_jndOqS&`LR zRQu=1)B`_b^z-R(F=jk}{>V`F;B*iCc9NhGLwk4~^B6(mAx!fXwy?-=iIn(Gj#WQ4 zWQX!RS&zFCXpNF)1NEZF&!V5(l$wb+$Y)55Y2T zDkKn-;m_CCD6l)xzki890GmS#&pWURXAib`P9Wy*JFCsI*zSG;8(!4Gym%dEA^R4T9;&iZ6%!UC>q(zh#lf*pLgV@UOeoMaU=5d6 zK=r%~@~WK3`OHkF0>iIdiP(qP6;B}bwdA3%Mh-nq1bC>EU&g%UoG#0?;j_cQ|FFBa%!~1VmK-1jlij!=3v|lIog|+ zhtzxvQwr~k(zH(CtW=Z96EZ3bz6UVLN`Y!E;2rWhNt*PVLgFFkiu`QIHU!}>$t`nxZ zUtMT*|5X?{RSt4&+OfRo3^!){P`C-2Tu%Qc{=OAWe3I>6>nWwTBpEUlfxdcU6HOoHNxOB==|NPVWY-8-4S zycOp(<0mw`Sg;_caJ)26n)>Cfsc?lFw<=nibVAqQMX9Oy;K^TzuTSE>WY3^Ob^?(x z)Padch~Vh;H{!q5M<}}g8k}3<1(j3>-gRbdQgaLx8d|WS>POLLW(kCxz6P^y?BwbT zcnq&xCB$q(=y*viOb3p*c3x3#vz$&M`iPxq$Km~H@h4|UL9V+UoM zEwRz8m|yg22O8ZvM*hRSz<5cjV4QpnE(Trjq&fNOWj6;pHhUd7>3qhC+r!XF`8}xQ z{ShtHf5(3}cW2-FRcJKX(W#)8q+p+A9z%Wfk7GjE`+*nzIf zis5>SV9RkIK;6~yZ1XdLMA0${_O(o*Af0?XmF@9&&|s%Ru&7IF?jaWfSPaQ%8EdE`eoC90;d`d-P!4CotgAWeAzH zm#xcY)NY;(Z`Z8Et3slAzV9HW^;MDiPLm~dEg6=Pun|fkN7AZsf*D=30Y1*;*t8|d zxP0jrevtQah%z0^HWUQof|k=*a9f5<4@Sa=iBkAg(3zetccXR}PG~8Q#Wk}exkFn- z{3nT6a%+4fau5>r_Znv7%H31o=`usu^jw#HnqbZRR!Pxniv)3&hcW){mt%%nkGZiQ z7qb-8WAyN09asv+=`l)kd54xCoSOS*4AN7eiKQk~8$Aif4$HzJ&vRIRP$~Sp^o?)o zZU!T*FsXPj%Wyu*P24_(d06#=ia;w*niIx94)=$7 zK7Yho`d-v6p^2;ad$6r1YD7hzkMYOcCESAcP+ncB6}#`8Ksw;Vx=M%B%N0umONIm8 zinF0vQT^yzAkR#$9AW`7MAmOlQpd|#@MX_@c%v@KUP_qYqJR{|vSwW>gMLUekk4AwSlIYX7xkaydQMmKt~8}Glsl&|XSy-p6A1=-@ttL3PgXwD)E zN^#DqBwjD&9)D$sKks-|1@_-Ifvx{0vfatm_;jNhlXja;YrW5LdV(D_q%Rc4J9pvT z4SR8;-!u4}z8`N1MrAph;WR+M7+unz;`3iltm#=C1SBca-HRpASGtX6?$D#{KJHwy zaw(jN$ikVun(TOW4Xl|iLDOT6*xM;WyHKu_AHKc=?=;mRfAukVJsHIuE(T#tgbkZ9 zrVjKcsL`tC;p}tzMw~z8EQAHf;iThx@U-qF{IaMBt_#-V!f@f)R+0s~EH2pleSD7G zF2SaJd-drG*sFV!r1e)m;?Z+dKHV+; zEVGBD%9OzswS6qCM3)?HK7jU@Zmi9*Luh}Fq<_Z;!DF#BD>>5y^{;dZl$0>N204wb zR_xDTLngIUpOt86(7c9Ye9wy+?AI}QRyI8lSJ#YSLsU0_?c+G^$%#~U!bqP4r7c&o zH3+md?P!tbAolCV0x&z9$W2Qg#|)!F;gnnlud>SyCq5{~zch@-Q zgYcaQ|B5>v??t`ewRrxu0uu@v0K>+CYD7Bi?-0@Pywu_BVspWw~LHl8jvU~8(q_D`O=3+ zM5UH$Z2aWGut{hMjtU84YYq>Ci(dDjvkyTgb{PNc_8crdZbUiG1-1sqihC!rZH z{uZJ^)#}lBaA*Z*T``wT8>+cRsaRgrHIO+y)+UX~L)e#lWw<8P1f$GdSXf~-b`1^@ zt^ozs*0v0O>F#0PK{MdHuL3*xcmV&W{s`11+0ulkmpFskskBDVgt-*j(o{twmcHT) zMwrc|RT7~P5TZc(@A~=qLd)UgzAs?=Xgn#H6yYn?%TN}RVOJpU1>O_)z{n6g%xjs> z>sx=pT7ThvcHRf^$b0WV;gbt($_yi;bLOmkaWS~7YH~AUzJZ}YfK9ZEWcOc3L3&&h z78SR{?)Da(yLAY~Ii%p94^7<7$Zf3UYC7DXT8>#P9I|@+am8v8z3qw@c`maB%R|R# ztM41|o8rfm9$N8_YJ+i$WioRg*nl~uitI#3EM(8Jq~`YJwCwL~2tW3ne=C0gP6iET z?v5*1%7+!4v6?x2aN)Uw#*N|!zSVF*Yd;Lwd5Qa^`5H#X2wU{sn<)t`1*)nkH|UuP zEwHf9BEt)Mry+5aJtZf8Fx;!@9JpKeWxJ0A(*rmZ92id`L~%i+gyOp z+6t+u^~&oq2ygD<~}D3b##0%QD-Nsul6L@(>HP3jR)dmx`Nf_6%+p+ z>rWfyQZT4uCN)H6G8xqxjQKK76y7-+e;Rauq2jOcpH~ph(>t1ZwPW6M9ul_*TiA z_P1w=)`lI2F*B`L_g*#pw=a!PH@9P}_T7T^-U3>BO9?M|jD^!LqR1)#9{!yY&C<`r za#N&D@S5LMcq`BntG^!P(&OD}!uB=b;uZ@w8Y9p*wTRnf)y%bAIl{--gkVt0C6TWE zNt~Y=4OdgPv#5`;xO@i3`ExR2y+h`>_Qz0E^jCz^*Wu`QdA}A@eMGa}m;$alqxfk5- z_|v~vs_aUmH$LrEqG{o+T=+PS&Stiwe33S5=tmqPXUaMXw~2-uFT;$R#_U|F6pM2m z#fExIvrk`qFgi7gA3X0o&Of9{dBsuqcfE-2*Jj}9pGiUjY&?Vu?}01cZf5_=#t0i{ zb21uv5KinjWfMM2gNot~zOuy~2i(nJclDODTOUH`Q{$yy*}jH``c170+9ZEn=2@w7vuL}v39=*EgD+L1AW%_-RfcGiR8rWZ%n zGbHHVka!&JqQEt}U*~Rh#jw19G(P(M6A1OJo3{TC#d%L-j|wUVHKj0P<2 zNCT(giR|1&XKVy{Zgs;Sest0-oZh2?v+DH7q)(2`D-m-8+&|&uv%|r|cNAV~HY^c~FTHr%h%ON=J}#^{7TyEG~5&4+EUl ziBhAf?b2-=J#RgK=EEJncv}l+uvLxLOpy~>wA%dG?-!V6&?)Gz(W4t#Z_wlW2{@7b zfKx2dVJq{xP=4ic%pXv~h5xH3n)4p|glj={co;o##OR??Y0h*@N zhAAssdD*c&y!zXJu-7~u9xtrp{BneI&cl4FygQ7$5oJKp9T&J@&oaEfb03Wz^dHsb zUL#Gx_NDk^GXag`dP-br`i=o?{NR1~(bk-gzLdvjU0N^tIM_gFrWb%ziW+sF{)zp! zt>|y9KI>e1hLec;h~bXmVyl+|Df-7nlq%RRzSE&h8vE5LnreC7gZo**muuXww0X2) zj{~#*Z$Auec>?T_E@k%yup1r!GxysCV>~QqJalrEL1Xy#`E%%XtOF;hIhM(|PhvH{ zJz>0%VqfR4&wOrfB&Xx)s9YR?I#X_mJ{QCZb#*IpvY5#B8jqmmk2ay)j5qu#;Xe6J zBMd)$J7%AA*qwg%q>@}&F=l@Ch5v@%z){C#*`!}`_^VY!<1O8AAOaP9s&E{k`Ac-haS--}`!<^L#!YLA%h@>M8^c9EpuhTD+>>4;SD5 zL02?x)6jx_V%?HB!KjN5=I*^atn-62#dO^I86<)Yk zEbMz-O8pLbz+Uf*G^2JiUf7ocCh8k0-CP~?%^pI1P#GPcP$+zI&lFb1rgB!~JD?e{ zaC^^jS{-ajdN0&r>3}nEGV?WJ%*g>2U+zf`ZorN;F8rF>WhNByk z!6PY;ZI<-|bIlXf?_*CIyf>OJd>%m_&QhXk&Q!{=cq$&!?ZgwL`Aq5XuKf03IlgL( zfagiOIjra+g`BLU$>~ne4HR*pQz#_bz7)2meSxj>LwS&*6_E__iS=1weWeV277HZU zf2H_$sr0<>5jwj-g^mZQuvf!bA!EQ%^7@wx3Za`IZFnJ1E;|4p<$8FAM#>{^%;$HB z-=R&K^Le3tFc^o2*p%$KTo5xBG5! zgtjub4T~dJ_kEPMrycBvpAnrihl@5^jbthj-L5rkK(*}de6_7vuygX|?3d3e=Yz3u z&vJlR`O1SjUKr1hf|7(CZcnQ^74HYPeZBah?>z|j%;S+hLwMG(t}?gP6R_XZAnclY zQdU~+FLwLo4n;Hj@TyniIeBtAtQ@d`=a~1x=1ITk=9n+Ezw1A6a8QA`g{tJYVIjN7 z9MSx1E8RA=5=(B`p`C3j>5tZs?pkrwedIvyES=SjmSs`jX^&~W(rPr{;l;~-tq@I> z>S&c0@ZCZW{*2N*T%yKB9FK<1yT3sF-3GGRl>|439)`}=svJ2080YnXfUz}T(BF>j zOqU54YMB<)J{NAf{eff4RJm@VJ-Sr?5xks-^2pSMLP@SKy_&TMEAHjPid~^>y<(l< z*BT=Fe;Y5xTzu)j^VGkY74z4Td#bVi}GTN$Vx3SlpPFEtPWkFFX_^sq4;6HG2I&@?f9*B!21p7 zsYNN0PM)?v%h7L1-qaP_7mQ((S<+m&i)6G~mI=PY>`B(}7{-^(<=J1Nx%PkzD^0u6 zEl>nMi(T~J#5Vc7otr5}q9Qt8wZk(Q#95bZa7TouJoRJ-dE|GZn*nP0xF`VszP~H% z{n;cdlVyyiwW>&D@Y!IYD! z%$pV*;WhP7U{ll`>Lxcs&)^vpykaKDj<`-{r~C6x*X^)6KNTMp?__J29)kPXPTV}T zR)}8yjULR6;xrQzUfQ)6HoV${kD7~PL7JfK`NtpP-dH#J#06_u?RT8mP^H0hC-#S0d+E;Ekcu}N z5Am(2Y|PSfMfdtv@%Efn@yFqD{AScjN*Q#EwyNboSBYYFXVnjyr4x+lw$*TVh8Gs< zPs7?f4Pw2s1MeCR{CTo1g)gwcI?oolzg8yQ-BDkaookCrXds2&EvMs|pDAMLAFy4V zC+xDyq=+FVqM~(2{FQKB+99RF#77NqJUW=J{pyKr<6jD68_q%c--q-pXg(*N&*34T zBhc>P1Gtb@$`?+a1hrS0eEexP_xt7vJ6o*z-0l|mFlS6<^|dZIxHO$K68FgNhIYXz z`U7c6v>SG<+s^CWY+y+XZt3IR2wRL)6xmEr2A6JaDC3OKgI<&PSMa)6Qp)h1=LO7B%R?bxpqPtZM~pO zxBE=vNf$hMla!(}3e(}r+Eywr-@sX^kKsb=d~BI066z{p{P29(+Bk^MY~4?z?`?yE z;TG)G5=u9YBUZ1qCmr*xl%F z&XQe8s`vuR3qLcvEV8x}wIlHer&qy96{SK7KeQJ2DsRBB+De?PW8>m8>ksZA* z?HuC<3#Bjo`P2LcuuISvmmN4K7Pe$^gYbup?+EC6ld0_ac!^lHK~#9A!p4gpil@~K zxLGTfMjq_Sre+q{J9rc~RAKlmPthpI6K{PKY&Pi~K)8=W%5H{C|j zkTh#-=n~9F8V7T2MK=7bGKQ2}Sn#?h+rHnhHG8l~LupGV#r>m!h3Url=rM&7z-P1X5fNN3di)(7m?IN;NfC2-=SBY70(!Pkrk9IRRhlO|`% zSB9umr7a)I@9yN_%svZwcE{hOa9xcx(>iiNxi%|!ACl;RX|jQ16~Q#$1F+Lk@ncaZ zTKo;g!Ed6)yV4$MQb>2c|D+W9m~ZCrh8uAGV=SIZSuJcDAYS|14nrQc5+ArH zJx2y|^-jOxlAy`&Y#XKf)MT-3t0qn}zDn{7yM)~jrL_8-5=a@nQ`mW4;5S8f;B+?# z_pdFYZ0ldb^}o_R)T>ycbZo@^@#U~N$OgBqC}b_eL`aAlkJU>9xa5EdFR%AyB^?*x zu|!On@n)ku@s>BJ>@9{#$vg`2MaJk^#s~Wm!rHehzn~CmqGGVM~2fVi6G03A= z2{P{h{Px9HZ0n!K4reb4%br%zsNXFRXgl{#=^{NUazd-U5`|R9 zpBL)4QD3K0QKNYqytckZyL&tqoIkvQuPyy?o~9%%L-&nJ?MeEjL2;$zD8y-OqRi(FCAqLxe$e1EX#MvPo>l#(%^Nr3yJizKG1&r9HHPdFnF591 zr}O9H1z;T+$iX&pFiKqmO`-yb}8%zd6 zM34vHOq8r^{Wf4mugUD^{emt;*U-TecB$@1;;70M)$3C~i~!vP$lvsoN7+k&60 zk!&j3ny9^D5}y0<94c~>1jBBHJa*tEGJX}$Gd#Uv&hvegsL+Fkm^EU+zGY}JDOLP1 zbD-!H)rEt<_UEi2_wW-)yKtQ(oO&<~|Czld=Ku%v`^w~Zbpx$=I8@BN-xEj86~xzd zXK0H=J%wBcyp|9PVW#}ZA=zQS{@2>h94Av+XQ2q ze;h40P{o!0X+qbBbFg)s31?T1;A3EcYV-X0LfRSpwxc_*njXa^nI1g!%OzZ-X2q%Q zCpg87og6++CY^)kVxlmX4^~J#c(B!HHtWl2?MqvnBNI$bLAV5Xjr3 zf}q>WNO5MuJ5b1oXN74O1)Yqq^nRi_PVoB#R;P8T)cG{^*`v<=B9rlKf)`I*G8BWV zm*MV1vv}ad3hb2sR`l0T5DZ2x%Sq9=)2lxYh<^bU`M+sZt3T&zZGcJ>!hn7@qHWDtYOhH~ z@{Z=4vo68*w@0w;j1uw)$&69QU>em*^1<`ivF98ib>$?Ocy~X)ycJCLpI_4RDk;bC zxIY`)+t50#ksMwXi7J2VX!+-<==kIaNB@z|#&0b6qEHHt7X@KcSyoj|y(Mc7=_S6f zG(i8Y6Imry6q-BV7v5aX;%YTVOb$6gy~iwvIoHn!u3HqiSb8SY|4hg0?^pA2c?9nV(I zz~fa(P;=8>Sm_$bpQc?W8_9|^aF80lD{G^B<=QyWa08w7T~w(g5q}5k)`E}20P5Ga z3p?F8O*9RnRlvT!`fU( zcv&ufzn6usJBJHn&g~FR9!kK-F5#Sc%@fCZ2MGRuRVZdn2H5zRa>e=>=&GDX3gh<+ zD!US~LMvYgshG~IzE6W;mVS`)$QtjwGUjh;6~fa4aWpjD23HIog4Zu+;;T>g7%UNU zd+t0yWfN=VI|~{2JYbcgIY?$3K-Qe6%voC@mM8Qf+8T@NijHe~D~S>&i`07X*iB zQ>>q$&pSt0al3yqM~q7GJif)=qJmr8)f1f zlPMHzeMWY)_9=NtE#mcSRdBZBJ@_-a5qw=wiD&i7c!GNeG))~%I{qGLGFY-CSk#Fq zWWt;8a#6F(Fj_m&g;x~Zr^t`qbUHtPTUL9rTR+LP+v_u&F&l}~!pCC9g%999#6xB) zkx#x_W_v`$gYnqqE$SqpA$mEzr*5_RdY zKaa{LQo^BRR!~DD8h1hTcVjGA^bi6!XkpKq7Dyc)L%88Aos3kA4y|<$k zigCPIbsX+&o(w5xv*A%|ocJU1AJuqSfvrL`hCjH#V*@VI#cYw5JTn2Wk|HrIs{`(g zE9CM(dtQ-yf+kAeP{S=-1j9?__;6*rC<~j6P#-QXF&RXMXX)~bIS1J&%@GO>7&^F~N~~{zmh&lE5pB+MrpC~#pN4$k({?h=m&iWG$k93LQCqIdL))D& z?vfgA{kym_dPIr6b z!N8H|)9VY&iC1(!cld}X*Y1GN{{dEvyy$E^<^~kD_hrvh#&|8O8q}8Fq#g&OJ|-WEV7`~@y7dr7y7XNb!>sS9r5n`xWhRbktvYqa!dfzY*l36EHOQYIR`r75S|(gd94pPd?OEYaBY6cL z6~lg6@7HW-ZOc86m;VD2=D>d}oS7dk_``zDgtmr(h#S_UPxcC449Ka2AB%Qx4@h|{Gd8yyjmm!0;n(d`)c24pTEFj)QkRv>GfnxZPhUv; z*sKMn{)^@*fWc^orTbjpjY^_g51-)-jdR@tB}}`XNC46MFb7 zntM72!RDkD;Awjj636S|PV4pJ=lj3tWa3MZ&F@SpvopksSqehy#7l7Lx+N!7{RH(G zCpaq2vo@<{Q`-Oq^c=ew7XGjz3%h+1Sylxyds#|Wjy{|!+W`9;_tE#PFVt^!5+*;A zY%9r2u)A7!tk|9l&c}1$%>xCoSvidUIxdC3Oa1Y0uz(A%#qovwIIuV}3Fd#wgMn%; ze0%u`KDgowO_**$R>qgbbjjj=#_1EJpH8Abx*Mz9BbHG4zJ zz2X|LLJYcl4BnMp76X@zk_DWu#^ljn0Rk z$%kS%ZVJ~z(<7NM)+$DPTNq5G#fH?&X*7PD<;y1E4xynL^uS;^++MMnLkuGY7u_MK zANm%K>FDB$HfB_4#^G z{3+3>j^>)mehyRSNZn1iW%@DU$K^zE>{KJ3>8Xw%!VmDsZ_Buww*ennvXHXh%%tt^ zmN+%S9aICA=)lbRH_kr)Q_-PK_)86BJ>C-dt z_!-5$>zCsJr|z`5AerY+iUFJ(%L}G=fyp-MxYh0rea$(*f8*S_&flG{FE|2mBTfpd zwod0|-#$Z{-()Or9e`N@qptSqM6kMZo*YI-zbM3f(PCaO?et;_=7MG|Hz){_6HbaJ+R^82-}~-}<@Z z(o@6n(uz9J=zBzb_G&Km(fUHIpO$fs-DQfonl2RgZWVLPO!(*PJYiFW)U)XMUY2yc zlX&Hl4feFOq!*j+i=(gHr2V=r6dI&LmVQ0p*~g)H=HF#-KZi8<_CP%STM4hJWPn$0 z6b83ibLT5-@ZHW)=)Pbf*8jLh3cc1!nbJfkh>|Rs2P|c47BmTO!miLzwG?r@X^iai zk+(ux;BUd1rc>jhF=();iLT~*W3992EXmu)mNG^9JTX%~1nZo0bF5 z7w;$czQcKje5;VC5rs42@lZiZgFjJQU3 zH@w=C%lk$Rq*L>ziN;YyP&P6TVse{Kdao6R1vG|2`u$w49J~Q6x=D63T@%cRH$+7f zUryL4Wps+A%y&>8&aSD339=wOXdX&^^*hjLuYs6;p++41LzgSwJp>%&K}BAGT`7(X zb^CLwmcOVI+Cmct+y|e7hUA=EflZo5c=?Y8Z#h{^iT}odq)Vrxt+Awcd@l`2xi6n6 z?N-e5%Y=KvI{J3urD(mN1EejF5uPPF<281`JWdh@OR2=`Z#6^A6P3`WQHQE81Z%wGVa2IG)ONN?1 zlOec4pN$tNVae=&aNPPj)pb1$^#eB1f3YKQ`qF*y-@)#@=R|+f3mN zb!DPWH)meP^mSOOIA{}9^aHq*qQWjIG8o`S>D#Y^cCn7huF&vtnYUn?!x zwnIl8_pXY24w%NQxLyqM+|C~}Y~kagpU^lv3uz}H9-_jdJSKlQg#&UGO@ zfLXBPKnQl9xJSywk;wDn@OEW5bPWF^Mvb0^*^dv>_qA>~t4)R9^m3!Ra6@U`AbF@ zO%M7ky%US!CY&9cgl4B2M5XjwR26SWa~H42V|5{ve!vOeCLE_V(XVLG)%SGpswxB; zt7A$~J-tYHPqOE|@C3(FQHZkarnDnFBjtvcU(S>0vZW9fb6@uUZ71yWc@+=6_XYlG z`pV0uy%WX%mgD`~cj$oMQ`+-sAGs;!lS9B(*y68?!D*2|Q*H6{{ef_Ps?;LeSt>^5 z7UNl&BAY9DLf)9C@U>W%JGI^w+vESits9f*)O|P5{U}kn^BzFUlt6wnHy(z5_2en% zXF!u`C+cdmky@*I3;bsryj<(gn+L|pir4C(Vp==E+&r@Cqzv}4Wi-p%kHhx$fX2Q{ zQB`>fZT8HCr|!L|`310%wK-)Nnu$H9PJvlTsT@%>g!ikCz{kNs-1q1}vg$E`Cl>0# zhL(7|A$R4X^`lW|evKJlyG0A*??nDF!vD_zI+)q{3K@-d0qsy7uBk|YtFh~8-dJ0XD$|!~YYoHPwIS3) zSw-gB)tK*Se+ z$7I9Pu4m|Nt_$W&s+KEtmVuV7WKf%9#vSgbK-b8T+<&4HC-<4c!>nQ{WxpPf)Gau+ zDT~jzj>Y&(rZl{)m}b_#q#Nz6WR#Ich5zjl@@7^Den)L zNqbI30rt;5fbWzd&{Ne@%9wiKq7n~sTWlw?nF0!WQ3ov}^5tVZg<`94lGB8$vsx-Q*pD&a^RDcVZBj$VcOK%UfVomXxarFVoAe3_o2HeO+m(&zol{KDW;8SRbkmzJ3ooI@NoPGp% z#{Mf3`EXjkaI*A1EYUNdz}X2rZQeEEP?MXWFmndp?O`o!w)+QXyCn11HPtZgnKznjG(<;|%^nC7r&}!w;NZ0O5Q2xKCK^oDtSqIid%4&V<@QIxCyf-|DbiQO`>uK75-te z95g1cCu;|P{<}^C+iOO_Yu9u1iM8=T*-bdr%apvA&Y+3MdT?gHo~-}6h(G_B(RpU2pP)8J8Zkz80>B(^2@=lfnt0?3nKhxTH=GU*?g zK^QOj6;Hn#ma<~j9U*0m)KT1{jIS=+^UFbH!iM{b*wQ+VhfGn&5c3D(%kw3|tt;WQ z;*uX^8fnWOJ7Xt@!7TVFIW}?t;$CS5oP_-l+A)3Kx61@bo(-sCd1R9@&;rtJQfi!7mkOq<6!$ z9!rFAC%k!5TLGz_D#S}Sq~4oPrEIK451d-1imA8@m8#DQ$K>(i^n^)V`LGbSHeZE0 zpGqNX64CS*_3>z=o~IY8P|6Xj9X4 z59}eAP%r1=06*T9wd!`0XLZcN&V?4V;O$aK-jqOYRo3*eZVl)hm<+`!>a@XXJN+PK ze7i0l2e?~`yC>PvZa*1EIQ)hi5pGa-+?>_I?u*YG7T_0@GA*Dd574X=s&fUtnXMy8 zJ{)Y{+>56xSz^DoPOz(8pNBSdgfT0Qf`0vNnv&F2>|1<`UW6GlD6f|YPRHTg?I(0~ zRW!YgxIyAUWzJMf6`~j9!tnRqsN>j`;ykM^Vu(=}R8uuVU5lYuxi&#GFY*zehxvoD zZeRJ$=@C`0^N+*9U8~^3fOHIhUP-qu_23+bFZ5`ER4AUkU8q05mM<1Ar~lM@(x>~r zvVK0<$=4mHx3rXh7pN^1?@)U~vF%Rno{~|@VQ1L*9KhO8EVmqVNv{2$1y>QB= zu=d_8{}T%;QV&+w$Wb07o#&J?v`JZG4ko_{l<3pi-1SvYF7lGBv-LjQ8u3l&SwBqJ z@YEHjYixj(EB?Utpb(zLYP_Xh866vXL&=q%*g5t-ZO@3|{w6vQ)p3`wF*ut}Kk(xF zzc-MM$9LgL`C;MM-Cq1S_Mgn{T_~Qv*)HrFco_DbImBVzFUyR|ok%TV8~QFi38|qf z`0x37*~zt?QNPRpUo>>bGT0&0`X!R{CtcJ_X%>?eP>kM1*<1bjSYQBsyR%hRRnV2bm3X60@@a~ndx>Nk8dWmKg;YA-47b|rLgDSs zbSPoG{N46C(R@e~m6a93e+i+OSfPrQ32oxiNNLa7{F4I1yYb@myL65M(Z%aKDC%4h z%DY*LW2_C);l~a1tUN`7j&#CT?+e7Qm#RRvyMu7DwpQH!eF&_6yISn>s0*qW4CC^* zo5d5-`95y(EXa${A@#Qt;P#09JRr%54-5(g7qfgQ4jzW?n>|FAkCo)SFd3(79|lFU zjyTXr;Q#j8@_64jU~RXNw4QCiTc0I9L!%!|sQv-Lnty4uqblmz9)-M=0qp+Bnj3Zx zhDY|&xAJ{|&gpzdR2cG;#w64WwTqjfYTs$>p%=$c=E-)~IS4JG0W6xc7n{xb|>q19q@94DWG!5?%%l(dBqfji9-!mLVk$co} z-Ax@{Kj4*+zPXOa4XG2a>wKW1^Uq+aiK6(XC`r8FoiA!P-+?jR=fPYH>6yv=LiZzE z}VUFe-_140_Ai;xd-^0C93WXdbCZOO+4*!-A(Ac^XASwNgYRe0$H*l%C@#;Y zx=T4ive<=v_dcUr%Uw{XIUVA3rL+1o7j_Bqhfzi?u>JICd~<6YCg}x{`|2A~ zZ4`v-Ix*roRX+?)aEAV|ejFp^dBr)gLO@JM_OjUr=4(dM=W#J$xhx8&t6dTE8oY6P z=P9sz;|NN*w}>J;HGpBa4OL;=PnD`M&V0S~IF%#h{~)>wJL8$A;?3G*PI+MikQ(DEo@!P&{^YcK<%j`d`jx+=!Y z^1-pRlbAAHCfnDj!efF);@0y~r11P2t#;jmPSUq=a>Gn9*yaG()~(0iEALYI?*iyP zBb@go{|81ZN1|q{H%(i)0bY|_j2P9CCLe!7rxm3C!Ou?kTH&Cq=BOhL*jFN1u69ah zI}^CqcoC;dRQs?mCPJrR5j@%-)5g4Iy!U1)?*80|ysoJ52~S(T6Dz%IYgglhFa^1W z)XQ?KNrIkXuc+K`Ab(3PqU%0cxZUFfD#cd`ZQhP#t~QPh_GrU^q(m;~=Tv>cqQ?|*%c9Bbeq|XmUcZ#zsTulU-~O` zkX*iN(7ib!@*dKR?zNXGb5l9&c0VD^8n&BW&$~s-D$Mx$^m=%lwm`CJt^}73<1u4E zEwrkyrn%=ILezCP$%63$E)^)UN68$hzC9dlKTSZtv}CUBJr=jzJVyJ!j^GM~K&+ph zE+%g%q4=Bz`Gp_rx!{vN>djn6etimPX7xB;IQX*MYSadbvsQ)+HE-y{A31OS(;`b* zdKOj+U zO4eIv4O<^p&}P``9Fw}7U421x`i%;%`OU}URxFPZ_&WIRl$5dD;p-b&Jh1|)PWQm7uK!N$BRc?dEAB) zVPHsEDAlkkG#b@}kc|`F3-haX6dCyC(`7yY)53iMAhi5eBxZ;5sQ+*K+a*_0R}_-O~5rddfg9=xZlCeb^V{j{PS- zXzu|Rc`W)Ty@q>9Teu`%3HPUaaGzdj+?Y@(-WW9>SAL7+QuP^6}HK|G@QA7g>W^GJtbOe0=1kY*~kQbVo8J^cklK z>Fd<--J=9}y5yMJn3rQx-VamZ&rML|SZaFIc z(3m8iY0jf#Z%Sm}B*w;)nxPV#VjXs!T|;a2a;W>;bXvKho%4m(Dc@4%KGQbI^DD8!` zf;`FKq{`F!#?TP?F}kI0$OZLN=+}@TsClJGT$iJQ-FJIJezOvf^4&(KBYZf|y&n%5 zIE?@D8shCs$h+$snDJ);#!ZRf$1Y{?Ks}KSlTVR)uPnGhd06my7oGg-%b(8v5gu*1 zF4HNC$LS|_Q+T>7>qzIcs3)epZtN=PupxTEIT~%FPks*?MmXi-*C@= zq&UCkfp9pq2CD58@n!?S${VBMzqoZUV%}k~_e*W|`TCxMdyWGkGKYU|wZXxPo}8lH z5j)o1gWWGY`P(;=HTz7%NTbm_&Ali8mM`UZ50YVDmn^up*cXeG7BZY31b3sO;9czw zI9TR_%hHd+1v(2O9eeW9bp`PE-#O^0Z~`=^NNBmrqtyQR2Sh%!hK>$u*eU0ccx~WI z!m_J^d9pRW5mmU~ja%?II-4I_PvT`MMby6Pp71bZDBK=33(hv(665yt#wFjEz_jLh zH0ZM}I!4;Fm0>@ee^0X6rVhiVUlHP{RegAn#OZr{B8z(%4&qx+HCVY(AEti30Jnbz zKvb3$zihC?S%VG$A~T z-~XOOS*a~#vG@}B^%@G-r#*lXp{k(&aSe=Ie4w(rFrOU1s$unnQepL1Bg~JU#eP#g zL{E!pWN~D(Fyq=uD*qbDd#7%dIE|z5Ma@Zw(0C5n(USOOq^ymj8?7h zg!4v62vz4k$ybitLY@nIa>C}1kXwJ6@^5_82_pD^)x> zxf?1SUW32p4Tf_p84$nD$MPJB;BG8qr&@hX6H3MAjIMm}>@DF-T>_0+(M0p6sIJuD(gj+kAkq;MKX1y7*fF1J~%)5Yi0Pz zPF$y)0eQb#Aimv!3zW*K-D(pB#b}_}#vu6Bz&&Rf40s8mqsXtWOF7%2(;;s85}tKg;)h(kC5{{EO|QEJ z!!gwY@-Rrk2c{RGEo>#V&g+3!tg`6-N)1|)^_Pxj8e;$4yRgY@rm*l~ID6m!3EOl= z!Ph%Opk+Y|Wu^|MD=YWW^M-*mT5}dVTS?#PJDqWBr6>L>n1sep*2CKKo{&~K3~MC2 zTf_Mhc;%BIc+^j%556({Wq>ov9w_no5=U;(RpPjcesJhgqwqm(01qqM1h(T-IZEng zMEBC(?s4 zlCdkf%TgP*{nR2HsR<`5X$SQF;ZV-(+CnK27vcKKWIAM>AZkrXlkfzM1^ ze7C(4RI2mo?Y&)M;i5kHdzC6T2k4+x)DWH*@LuZgjAHE{Q8-|Rvk=uhV%V69m*hI9 z5AJi9iTjGoP`fcsB81PAw?+37Yv=poQ=@@gyuS-8t~?71#`h(Qaqk7cJ0@&1EEf8X z(Z?~prr_}_mUv*KGnxfXqynq)xWg@llh$6L8ztYNQ}Y>6cE}|iAVd|LQwu-0y<9=c|(~Wel{{k{R!EWzCWKIpIHv;c`5ik zxI-O=riec-T%>+Ottq167o?25NS7W|P{X*HyrZ}Yw)#7Q*#={-SpEu%&-h~ZQN}dx z!Fa|?mtf| zv3%LK4tU1lo;07=rsChHss85|3f8pY9-%*Bg+UTkjI9HcOA}=7>zw#>th9TKlWH3g zhp|YbWza|JrW_wK7fXHr z3ZKF>Ie$SjC4Rm`>BB;}V05v3%91vCWu4?aJ!>oHmOQ6Dy$-}@a5h}0I!fEdnNY{5b{A>)O9>p>UP&KJs-x!BR9Y>NlR*jsR=7%HZ#Ddq z>yA_0JBHx#QBRZcjFF4^smy5ADZx zS35xd+-_2yYaji~R>$+{@oeXHh)%L(_j8IvFv}nWgU@th<*-w9?nF8@)do=U z`!2(7?Ys}W9Qu=&^?3P>tR}ElkP-)q@zlCl8D}Ofk~g-P(ch~YxY4+T3eUe1|1?Tm z;L(aa=yEqae=8MIpLHOE;@-GJ%z!l1l4KJ4tCL?Y-1}{qEo6zWoJU9*^sKzt4HS zp3gOMm?AO2GL0tEgdZv5mp+p?G(`bDKNZ5gIRS!mz6qbHH^C*QI@nX<4!>(Q!ggT= zqk^rVQRBmA{?4fX@F7G?cEtEQTKr(te(}rper&g*iaJHO3C$|wuwYmyYvmWh_>?Z( zw&WdT_FshuzL?_>HAVJRoDHJabIRD5!|6Z0pnbPFb&o5Cd1QgBpKjv0vyajH|0ang zRdX@8Uj~h=a|Kz9I>tBLfabROu&wYiC1ff<>D>oH>6CYLe6ItZ)jcb$`T(RLaid4B z?Z;a`?MK6@A&?Sq5Xb2+!w;>`>HN^k;tkF2+`83Ae)hk0LUoM5t(+TfQ*hIC+M*Qq++yd^7m8 zkn&#YsaXW^jstTc+-N2gOqq>AJ5AYdem#sFt`9|7DO9mLTsZez8Ah8wA;mFgY2_4K z{9UDl$MaSA%Yz^c3YdUv!}~G~NuduWfYz53v88bfWi3!bdbC;8Nt#5KF)cJgVvAcy zv+rVQF0iiI5f{Dh4(*dX$aC~J=xvZH>s6_Q2ZdaWzcZK~o$JB8_a&qUzNBvhmx5Pa z1Vq2j<^c~o^XS2`Y#22Le-Dy&oozX?H|c$FcgZ+mV71{{+IFu3-RvE}prjjXJKlx3`LlU;k`jJzRin;+9xTr|4ay%CdHT@| zQG&_ysW=s3kXkv6mcAj+YR-yDkxn!ten0*G+moKACy_9D6uQc%@bC>&`Thh865x=0 zy-F~y(V4-?r{;p*Nrt~eT47hU3Z`ugLWAd;m}sER^S_RTU*S#i#H_WP|4|t~j@^v4 z_8ss>bv8VDv;ZT{pA?)EBUwwF!5508{KC~Fj%+XIU*V60%t&8(*4(j})Jxh!4c5i# zy{)V=Ft)MiCjpwW4%0u`y)P}W*oxE(hi}YRX7u5zw_yWZQN;8^Crd;yq z3+O56a+OA+#Nv(>Vt$O`)csfKQuSVnnD0z(8j^vkQi;Dc?-5Ok6)<#g67-WY*)^s| z(D$mGlvQ?8*3->G^<7)}lBdS-Xa7pkcSbtQGP@*;c8$d=J+fGJfC+j!Jd^YJLKi5FQ8#qE7kVSBDbgyv}=_Ex^%ik?m?B{d}zE&Kg&Cme|a1K9seHwsQ!@h zd!2YdiyF_@+ayM~&Sc%pL1?hQ7Plm3^12bqIP{1nE?B$*GeYWN=THlt^5ILhS-mS3 z?lt4-3peAshc4WB!IO5E*}~b4Zgf^nn{sW`>Cx=IY&-ohUH3|5{W=@E*Tex zSa%pJ^-EUU7;>Ru4P}JK3XLbO!WX@KXzpUcYpo1X%Po+Dp7n<%S_w4S)(ouHv6v|lc!N$;oe*(O^)^m$Jx2oiW%)dX?w)A1Ov7vai>8HFTXc2~O3grEnhHU=hFsz)n8yX_M z(VRKG`0Jh*)yr}YLtZyM`j=TvDyJ(c#(N}h_?ih$-6pU>n-7QS2)yBBy(l-Ih)d?_ z!2Y*msrMiy{#6_a;vUKAFg1~1CA3ickhAo%;I1rJ>mEgW#es|edny0r%CZ4vLWsI4 z54Q8ib>rOY{=bI(=_Nkz=evNSL$69`r=mdwi$3n=-XX23;HXM=eDBTYi!<9o7^z3UcoJ(sH z>dN%E-1mW46jcKT$J%Iopgp@Bn8Z~E_44crV|KnS8Ix1i6IqoBj!spe@jQo}f2^m? z`X(^=aXxu3{0&|Ao)c>VZ{r=iqu_2n83Ozp?%~@kvBOM zq+ALy6VrriR$Z|r;ugiX@DETqVv)B5$qk)GEC zt2rMjZhwRLLbU>-QC(T|d}u7Vj!6{DvwMhIU+sj~^R@Vc z#z9!LaG_*V-^aD`S3=%$CEor{4O`c>i}D*&dEKtN;)1(Y`1pGY-TbJ{KBKol`A=z1 zD)HnRf~QfeMU60JMJBw7O2B^RV`;`p6*yW~0!NVi z>8b02;{LO+<53Nzjj>}Ra{`T~L>lHBPm2xPDd);hDA#tTeoqJUtczLOT~tMdg3Z=QSVETxCdgXSty;=D93a(GCR`HukiM+e#ANF~RNvGce~mT97a!z8*t%#=y|$k|CmZvW8gou5 zdrrL~q&#T<-*hu|9WKzF0H2fILe~EKl2N%o+N*pNx`xM5*9li(u4E0?ezt?!*L|Vi zYu2E4vl4C?-wXc*ccTrRG}tpvAFQPJqjUREEHzVxJ%d$%{S5eX=nb(o;4FVpx=P~) zBX0ExMelk?&OI`LJsLWY%3Kc-dYzyluMXlup_ZCA)k&-yeS9+V8mal)v-$HgFsx=F zajgXeP8o*pedgf3U0tzeWEOPnZjCv56LC?mL)0&E1*j=F;vsiMC{#1!6{Gr8d!6yY zRdIiXpl_;YoNV`z34Wl7`ygW<9?(d0FarVBsZ`O0j7_j$ z^*n4(pF?kET?K=4rub;@FKK7;nL1~TmER5-#gi9Wqw@VAiT^T{G`_8fEpx_`YxX$J?g=}#7WlzQXG zP&3e&r^4wS&B*#!CY)St&bur9accWL+V`r7F86*2dEx)jcojW*)!hK19VSYw`koLw z%$DEA`_Y+r5~pi<^2j`89zT0IzTEW`Ub3~{C(%BRWbwK@MBeGGLn zy5Vl;OAz1wik2KM;8T~GdN1|CM-NR!vl<5qQQ64L!dv`m{zbdtGtoWwMLRxkYW-zyj|$b`ckE%}w+QAoI@ zElypt7-i41#Qgpm*k<5{5cLK+`P$&Aeni#bV|h!TL6UK>7kdAi$WanPBz%e*2JY<) z7K@$=CHD2Ocy>oIqoyM??I?h&%AJw4-ouT~h8(Z(o0cE`3(oPMVA|bs`Ss`zq*S&{ zEa~1zwzhpyt=0{1ZhS#+gbY}?&WG>W*kDX#COz<$_ANzvVrPRraKtT$`#ifRyVQRL z&&oqQvrK~qxp#t~O*Y`UM{-)c9?!+*XDEI|B0hDk6OQ@Lfv2s`qC=Wws~oWi+}fUj;mSQyvWXDxKQ~uES0T!fD2ZbZT4g&WVNk*icglzH5`|xw`Z< zF^#1D-<|pL<&R_)a1=(rmbh8|wUqRsKToKsq+c8NiK5B^?$@Cf+RYk8ZwFmYw&{j{ zmdDVKS*c*UeluA)_o9;J+OX|cnvlE3m5oR3Bt6Flfp;58ztdZ|=krx={HKn7Bw9(f z&QD+{Me7nT<%#a0IusW@;*^!yKDd1Ux45RPgrk3k$v%CxLtR~MY(559*11JUI#6>wRnf>V~X6OnR*b-s!8 zKwS;Tj{G8)FIrmd_TN)zJoipGR29NIovg*_o=$XY_Xc=5-IS+zTEbfYKA@BdSo~-Z z$9i<Dp0jww&|T0#Xw&G5ugJx<(K2Wp#t(azzvbVTagpLg&RZuR|3%_fOt z?;3*N|Bhh_ca?W4FNFR5kBGaj-hi@m1G%Q`7N>q4NZB$0AHQGEhed7Zv_x`BBq~tG zHdQ!iuE23RUhsReKAKb+p^v*aPJ569-CxGSuo31MJX$iYe;ErMuDL<&y@Pn8Vlj`_ zJ_Lmm>&d;x5;6YHEV39^4#wYdY1$%*9uuo~UQkW;~3@iY*f5(MJYkySy z?;#mCNgS~cT72?-5H{t+aj)Yusrim7`rOKdRCQCdAG8RzMW3gk!zEkqj@ew^Fou*5 zM~lXdIi#u(3Q?IGxnry*>Rca*V$m8l_tZn{6NowwB=+9HA$;R!v(UU@6uv?YHtus4 z4lP|rvAsR%#FuFBIF})xwEv&DO3GK(#vdS`b|VOO+Y4!{4d_taJU)D*k(6vKInC%9 z*t<25LiR}L^l%*v*H?zSlBw6r-CI1_^pPx;=do^?0^i;gM|$Hr%SNcg(~zOs_(v|8 z&&SHZp`S5W4!uV{S3}8ma4F?TM)C2PM8$^FIb_Ftm|#AHqtDoI_}(v|6z58bHUvX1 z{eVODyUBB-B{r>;x~Y>*Mc2a_*tfD(G+SH)YpS|I?xG?g@~jHp8Kwr8wUHG97qi+$ z4Yn=YBJJCIk#%_`KYJ+gjddMacl`#Mapm093_YaR?ZeU5}GM|k;nH5BxAfs98L zg71nl*g1PYb#YiID1Lh^PAn^-?;7sX|70q?KN7+oR+8)Hx)z$&cjo?Q6yypIH}KQz ziS(o0h0jYS;HA$bUQwz7M*Q;Pge-NuGxq@nI7N_8(_rfPN1Yc`d0?Sz0T1@P4%M>~ zWuVd*nLdhx6$o?hIC>!kPN?N$3IC{pURpOARVW9#Y1+G09X z1MstZcX+?k6BRpnz>sy3@JROw4Y0|l>&-*ic*1(>{2>vn``3s|5;h92s{HWB+a2`m z^B4M-6-(2NQ*dvHKjG`F_IJYFYSO;H zu8Ml@Jw&NZ)9}paKQ0A#!+F-AtK#sfinwCtQ6W^`2jdsah6Sa{9HyX(IdKheWcC2s zylS;*8Ee1`rJZYFa20)YE92j$AH>1tx#Aay#V%C?#JJ#A*mQXp-TrNc@jG4E^OG-M zwC>44dx!F{hr@CA4_nMSHdA)g(wXCKei!;hbi?0LUuB+j4!U`NA)j2+DEjS7ps2bd zVhi*~zgZ{6-LjjIeq|q2q!`GObJxO@;?dCARfaFhkc3ZHSn1a=PTzT=w!%*FAB>O#^1}5d&WH6xynpLNs z+7Ouo4HmuWZI~)L3?~XQv&GdvG(gw91Mbj>mJsAtxNLeLJNDlxE)1yP)7R^TV_)2O z>Ge}^4KLB~7)|(6en$xC?!hiIy0XWY2wvwo3xlk>@Utw*V=(s!G#Bj=3)dbLD|hO_ z1+x~}+C_c%=IJ!*ekhgN2hO5iO%Ioxy+w=%>!}VsW@?R-byx@d`i^Xb_rS)o5)Vd8h%`; zftlZo*}cq&f?+1k%K0IrNgk^0{Q~%vYA>#R@Kku8cMV#87{f5nJMijBHs8}PgRc#7 zeC|t6G}v5@E~=Ypiu){xiTfpGtDD77pX&v)uUW8aT9f#F@=Ypi?+fAQ2IJN~=2T!V zjg5cZ61P=_^1Bp&w*GjL)D=xR?5+&Q7)5c4XoYv4t-x(dHa4V^pzYs>f9>GnG zJ{)rXC>M8Gh?kZ2%k8ZKXyu<1lt2%}DNp}{aW991-PL$8y+;A)$hPo+XZ2K;W{A&E zbm6FNN|0+)BLu%xg|lXjB;OkVsWJ^L9$hc5AEw78sDzIV+*tK@3Hv0&y@2wRK{RZKC^&GA{s=*Ugp2=6u9YOV^3ZXxhar>xI zXuHA{N-GrbVdoIm|J+KSO4`MmD1EGp19@nmG;0ohLLbiA!Kqu}oRc+y_uQ8;Y>N}8 zZ0pG1hK;6z>InF6>SMa{d?_b=R>F>p8)%$w8T30Q&21J>fCa5KsM^|{uNPmUOyf~- z*M24aYYBi(RUIj#W;q+gPJl4&iP-#WI_6|co4EAryy9^k?agkW84qO`U(y|`=OwfA z{!zTkR*wwc%%{ogdUK7Wbm*e6Uc5JUvS{D%0$jZMv5vJij+xm)VY%6KyX?5orS_NL z<|p-ZEv?uoaw>ny@)zqCs_@eh)A{G^pThCSH{s8j3|>{Wj3-{o6=SDM+1*3teCX;E z@O$;0CbpK5&sGOqaM*;Eo4*PEf6wB>mj^Je`a0|`lQ_b4>GYqa4H(g)_4 z<2V7^?~dYJWfxpqe^Gu}G~>d14d9=p$w7Go+5Ds*_f>Gl1NQxB#Kjm;RJsE{W0wz2 z)6ry$4_<6_Gzl-kJ6K*2Eajv9Xv=g9>>8=ZVKKUNw>1HuY%PX>f1aGwIho%MD-eg8 zx`XSgjXcOe>J$%t4$W?-V6l2S^Hx(D*p|o_vOD1xEftvlsVmk8zNfu;7Od8Ds_@41 zcJ=y?F05wv2>zWZ1;>WV)PKk(v~V_o@MRaFK7y4aX`;%$FnLpsRL=PV~Z3Gh= zKa#!tPu3-?8+QMg2I1kBX&EhtX zYB>HR8e|27vFz&@dJ}veJo}r-s=T5^$2RGA8f3{YB4=`nPzJh(JL00d&g`PANdX!! z#h1B7)I7G-rKo*94D0y~?C(6GyH*Ej%(&UGq2LWIno&ln?_a={v^8*PwmM#MRH0L& zTByf^7_`iG*45pm5VrA^=X#i=QYmQuI7wT# z7@|WQL^1sz8G|no5E*lR~=<9Rh{IdfwqoEF_UmHq2m)r50 zuKTIxYd8AgC3PG#(>c=YyZlGu0dbi59a+#ZiQm3stoWsIIq$rE9d4i7!A++=$&F4Q zq1=go;HZ@a*FKLI9@n{`>YO=vJJb}%thN!VGPI#Je>d!y`c<^d^W~b7d6eW`C!TCv zfhLxZh2^3-KD9}eI$x@gWW5XC2tTQHo(*V>`7JNjpTO^e<7vXg(Hs${OE=4{@WYW> zdEUy`;+1)lOEtU*c6Hq*9k@%#JK>f*{YEB_@KU62WQ?yKs`HW4=jp|W&l1~pisS_w zfZ-!Q!uJJ#;9rq6BQDnzI?wwFqw6-1>KZNda0wBsq6qA|-H~~@$HBzChPb-94HhTP zu2xk3EXVRYuy@4}VRDfYKY3$>=>>-&F*X(dOzpv)tUo}Z!A$fV=PsW*;s;!aI7_>H zRe7u56-aXK$E~xZK7LR?d~G}fl~jZH>dn`}%e$el>EAEN4g5_y4)*A&(@akrdP3nN zZRq>rteD?<7@xcI52CE!3y+TI!Bww~@T>U%l(@$5*K;Mb{?1zA<$@2iR&I@|5$}cK zkpIM3-&klqSSl`EkW7xp15f3?y1?;WuR?UZ0_b1zLZ8q2_`)FyF5bL^fAs6cac(2H zEq{^lr28JRY?$P6JZr*Hr;DZRn=+JY-4kRXgJf!N^w`__2Sh*87N*;H zO}=wT)G>S^EKcvttNlOHFZ0W!e%%nA^m?Jf^P}QoWnWD3orjOg&A9GU5X>JkU2+CG zk+otYl!kq#Nzd}>w##!8JCusfiR!|pF`r<)&VGg|62IG&S81xwq4b7EBru4;;mv7NgZ3EbGxfr^SzX|`I=iH9uZ!`&w8(Wi=OhtP+>$^aWk3 z1TuZB58kue$kk>hXObm0Ydj#ALx!Tt#fd!c#7`(wa6*ed9H`LSE4Vf9MK-(ihT)M*yCYE`k%ga5yNQ}km z;)Qf9H&@(idtRJ)=q7bj)`q8lAHg!cQ(|L52O9M5BjopSp_wV8C9m8d_HG!5){=8! zR;CrW8S5~M2dRg9xm?{M5+a5Spsc$)+45c?>SV6s(((_KaOOSziSGKFye2EBNo`&#kH<&kLg zMjMPLMsnc(c-~qTfMti3(Kj?sd|Rk5j%7|#_O|i@Rc+vv$YdrcTVIXkMGOeN*r-+yfLXJtQJ@PI7I(B>cOl+O>|Z(g@T~1 z*cxF%ChAwHU`IF>RD7k068B6s#|N^12J!1aBRn0Z&rv_+6hNzA*iddaG8*9-mR+(1CYg zSl2+z>l8z-tG+_=A0?dYb60lN#S(5in6ci;z3@CP5vQ#@zTCCd8=3or(}{yQwyK$aN&U=yo15Ta)=x57tjjkf zK1tuF|HR%tgD_^70S|w-OMH5?Gmp<`1t;IzARDDcEgD|@#oGyOhG?PYi#g~z+gosG zma;$>jtS#`7r@B3eeh4kZSi303*m!(h!Alj8h%a<+i`Q4%Js8vQVCo%+X`T6jH z%xYovizATSSy7q=Y!~$}PUY5N;gIwUX#YVQadOOhcJABnFI2h0nNRN7B96N{fN#c!fqtGRPkMWXVq;Fh#4j7D;Fmw% zPSL{8tA5hpkNz~^R1(ksRVd8q+liNt+<=)~gK*_YReC53;L7R>=(!^v7OXr=8!nBb z3#w}T4ys6=5sRqlBM&x+!HMk>KlfK3ju@tgC;VM8yU-Ul{qg4}Fyuc23~|ZKMD@uO)4{0xrjqZI*|>4o zCOl?w>ha45sY58jv zFnQi8{#sfrxo{4ELpVaOHii?89=s%173N+`LtXElpt5f!b~teYa#p9w=iJ&zE_r5j zzAzX1SHubVNAk(fZy4py$fD@MOe}rByYT^`e|aQcTs%!!IDWdYU%OE3)$Yjw^J9cF zMoKWK*H^k~Y=@Hv*~0I#o$xIzo%#+<6DI^+k#Y%T^wC`hKPiu*--S=0b;KfEbFU*m zNOXfilRH4TPXo-a*iLE3#-T6lp;w7z*|7T)i2!Z2!u;~^kjEaG< zNs*lD;K)lFCSl^eNPZ^J9J6O6Es1?-RG}ZPv|fVU(icMFk$1AR-F-P@b0=7|qZ{|QGansqD?(X=9!{v< z2Hh4+$Bgnk@%ow7kRKDs)~jRa!HYQY%&%L*a*vfXc5fUPRMk_HFbj;NedP4P^T}j; zPe>YNjxFDALe{DWbfEDhb$ELXb{1LU?Y+yvB16RTW1ac^M`f^Fr-J`UnYT9)8zo^E z(8p&1qRU1N*cPe`Pk&A2txcQ2N*pC5WTasEbOk)LuZ{xD)%dcO4u825$}Y;2Fnhd! zKh`;t5aWpY9cppi`Y`^d7D~$5J!!GTEn7eXMOC%$;%tiN-|ap$)a@?3%1%SNMH zurjo*vu5|VzVOKV1+7uKEKWRnMm%6rPdB9JbYR30P=DV_6<=Jipz|{@*mN4Kcbc(N zSv;NUlgA-18Yn{+B_EQUMA^n8Fg|lT&-wRWEd1RO7aB)lUsV;fOdpG**Zw7ie7O)` z`bB=Scs^PG`%NzAV#MWrqG{2rWw36{V$wA2#q&>%rzx{HQ}@~=d|E#Q>dFqG^0h;x ze4rD*iii=54=VBS4myneDmnT_M|NBhiP!BL;bL(r|J!v^XiD;BzkXTt{8$pTAI%ZA zb?t_iRB|}eOdB1%9*}*y5&R5N!-nd9XtK9A1dJ~cB7eoB@uJgo(zFAoOFY#pzG_(c z(|~^lXh^fSH85o6L)kp9QDQ+CCH}3Z%}@UAq;bUuAf;nHR6L#m7iS))XR8L{cf%W^ z5WbTm-8;aQUM)0m{1O@(-hpfGeG=BlRoSk?Mi^uyii7Mrp;Cn;kxLSdKQ|7}lG5lA7}a z;rM$^K0R^|O)8j=7i&Z4YH1n$-C7~H*OTVYpMr5(t23^i7bT9vL|nRY1RjmNB`A0q zz|BGh>=bOlqZdr%sqTMiQu;P=!yRpIc`n_xcCO@_Kl4#Ttv|KQ*hERD1+efz5q7Mr z6Vze~SjS%zPV9-1yh$otU)_-xUN&OkLY;hTGsE{?lPP^5V9KxyRKBYkdf!dO;{1U$ zd0H60K?fA}x6y%)B4s&+(cCxTd?a}Vf89NVZTKK;T5*tC!q&kx&sHdHx8YL3F%ROr%4o*7c#Ho=zP-|*upCD9y}selQw zHI%UI9GFISfybYgf$e|SsJGTe{&jpTCnQMz$L0{2EfQsam?kq#8jD+BTk!C)RZuR? zvuEh9zG)dEIAe{PD?AAsR$*TC7r13x#4pc>iQ6*E*TVc`F;;;*jcp(;fn z|D%tej^+u|nUN7k`E)g@j3kP zuSC`6Wb#M%D(=A%SQuAM{zcGdoD4{bNcYCZqoO;a2oee8^E9L zC-J+h7wMi}xe%AzA{YfN#L!!l1XJl9y~jt1|2TO=Y=SoHcLw;q)kjuQ=9M4E3WF=dW_qwG`a^y%0DJayO%Y!t2wJ^`DA2OSpjin0~>$~28v zDc6#N`%Lb6K7*n{l+oZw7!N(8#mmz#P^IE}_%PEQQx2}j)n)G7C(#_{^_U?i)xgm9=*fYnJBK>m2#;Ua(-`bU89zF)Q zp7zkUawna=uEq@mnC?~ef(wd)cx$-|Xty>~{r6tjf3ge`K2{3$ZGT{Q^-_ikPwZu| z9>&~irZU6d5D{XI+EqF@eo6w2&(Pya{nw(+&NM2NSau(0^uZmMvjyvZB3yZOhU>x)Rp8R;SU`ZRFhqBr8LIkEieUL_>l?ab*9r_kP(X4$6p(xKU#dW%W* zR`|f)PxMr|L`~j{xZjm)aCwFSCyqS;_NlQ@u}cvGFMXgsvMJ({sNJBG5sy!$*+y=( z6Ds^JC6^6fh4b5zg|Kud!6A!> z8&l}Z8C2?=ESu)A3^TvK5kIje0C=7#$4!rC!{P9i>}=%9+YAcO@6SZanq%mn?eCV5HK+Hotl+HF&Wjc;w4NEh zU}?DZjJea7X3f#-pRLeA;s5@m_}kNYuUo>PQ(f5i(phlNUO?fE7V=3ee~1btsnUMi zRJ675f~T94M3=O5xN%^KOBV$NvA&xEDH|Hm1Gfg@!QAcg*6a$wGe3d;^l<>+{gFcB zy$NzJuVQ-k?Xu|gv4O4?uCA_$y9<5Cxr=6f1j@6^6N>fs(RVFlsNJj%C(>Sv*DNOza%J zAC{fkA#MmcB__h*Kg>-y5}_yaZ|Hz}zbA_FFV3tR zWzD!Rn*V27WIQN{SHF%!`^%Ee*`paw46YFL7o}s@_g96PS~K~TRWuu`ZsCy6*Q#&t z>Bk3DM`M8eDA?591oOMcD2V#{GT7$cKThcJ+ zJzzLZ69a!g6vyT*kf`((ut>=S!%_gnSxNLkb0D`o8h{P+ZQ1Nk3R#Xe0KYle(D$hm zpIuW*`Y!uNlLyd~rTTvA<09j54EaGrn< zW&}u+!pqw^NzjXnK=npUNmR@WixSp zFNtPj)+#IA(-}QK$J6udcXF9(M?R;KC)9>+fHRLa^DHSv+xts8uXzY?Ql`z?%ZBmL zzuoYyRu*rqvBeqNd-K-D3B0VsRhN(jYw(ACSKb$E#T)N+_%m*DfZqH(qGyY z5BJ^1+cub@^bU}Z+cJg+hpnb-6MNEEmt|bP<+AujP{O)ijl$T6FX)P%3y+QaNx}Yp z_(k)sl*Ckr@lrFk&dL-&U%CmC?ewUw@)bBZ1+$YhjZ<88nqC>cqyhHFL3g|rpULqj zAzc;qe}v-F4~D!qe=^VeFbZ!gI19taO~ZxrU0~=RW3K#qj7k1%bdl5wknAuTyz=@`Y~d7s=;99R--^PW6Qg*LL<)YHo{lZ)EwDMW3Kk@HVhv?I_-|D$ z7*0^fKN-`wWQHlH-qu79+jP!yT8=ixb8w5ek#m;!$I~7B@SMYY=yl&yI9hpFc=dh` zbqdhnpnZL@qHaAjIK|Nw^?X_*nP=-0bHFBhE-tsKhvxl`G|xeq@7(Rri=*~(tJLJK z4Sq~_zXpMyheVIq-9QO%e58bXCGL2-R3aBt(*AX8Xll6B5I(h+YnLYQ!2(AZlJX9Y zOmh(4>ine7>&nH>1%j01+s$Xv4e>wc1a?}uP^hwp{Axw*dSWucv&sWqfw3L_Iw*k>kf56r!g;p+k22098r+7807!G`>=F#_-Qn1U^a&tbR6Ui@M77siL27gt6u;UDwX^RK(R zDBGe`h%uc;|MKUdalAL?a5m1GQiNywx-&&2L)faVlyq_m3DZuKZuWojqc(@|v(X9g z{SyL3!H&WUzqx$3ERCOCSHL%tRr%&*Cs4H*A%+F2Q1CeuKJU8}?9wvOz~vf^eI(sX zKg{Lyp8Z%;qFaxOcn_=YZ^gfVM$rD*XK|1D4b=JD9gCyhk`3tNf^m*`JHUyL8t&%1 z4NI|7cOkEi-5~z0*oJL4EZBMW8rl>sk<_wRq4|;xBJ^GhH(G3{A*rv_^2;P|Gaq!b ze2evsK(}WcgkaN0)Y$R?{I^b~VF&bh`_EkL5*-B#t5V_C(E>< z2=XQ0sm{GSM8>Uw2ae33wduK;Z7FBqtYa{GG>xh%o zX27IYE1~-E25LJbQe%)eTb1k<|DE*0%(`G!Y@8tYq;KFU0XYz7Siow$9gigV!_KXf z*}!-q=hZn=!?Xs{k-jBYmTnQ|NJ)Q}nyDxYz6<*+qy(WfsV$kKD0cid8ho4jqq;*T zBwS9wYu_)@z9o_DIMtBfBq!s&rPCoUFCB#lFTSd)MX$F{;4%{jYJ023YXbWqyge+B zK4-+&24-Q9oey%d4|UCzjD{<1xI}*xmB(suVfakgj$!alGS3D7PJqW*X;?nv0-Y&1 z1ylTn<9G8Q`1r3DURbukLHIBWkFEb)J;F*kH&%Htq&0o{}pBlLRRvH{w{9N$bI~ZB{4m>_O9K8agv2@Z?Sk!qs zPEuCnNdG(H;vH(@rU|W-Gw?ArN?Y0C&pY#SkMY>0&4q7+4|Z?#z|M_h5c;k{&6m>O zI-!_u{`@3{?3>Q&O(~LbQJvP8tK+WgYlMsAV&UdJBMOiGLn|AzdD`7|Jh^8Kw3j-7 znS2AH(Pmz~%muA%1Atw}(bgSD;nEBp+&BNUVBIN~l0CO_TdTKx&!s$m=NC(*qiZ0u zG7d6(Ci3sq8F;xpkv3Y5h7BH9DCl@74S6z)zg^hKQ;R%01t-vGOzNJ!GeeZ8ibaRfvH`-#0?)Xe&Gx?aY5fWB!%v#v2~p5jtqT z6*c~Cg3U``i0YdRX{|>ne|9ey9(m-mZm+qF2hIzIj;96NRAruLAra6il=GvS#q`f< z;>?c9*vUVPZv=Ydx>1wytIQcr^l(PyeS;wEM7(5e9LE>;4kWJw<7jEc2psF|8}ABtgbjw zO1v3mrK5I@6~`7KV{bjU?Bv1Uf+b?CN)g$#p923?$A$7+z^lDia>}Ho)kCeFdG@<} zVB=D-uIf$lanj)^){noo*OGG8I%=^@p)QNssQZfl;EqJl=;NmflW(U9M=no+1;1wT zjuVO8WM70gf)i_xv?%=0Yz&volAxR6XTfvvdLeVwYw&JZPGM=LxMI{U&h`<&aIYV%=wc29AqKqi zef0mK>rBIP`lElXqFED-L@3HoLa1l2)gVG-C^BWrtdc^8(5OL_G$|sKh@zz7*=r%w zFJxAdsff&yIm6li>%2MFbwVt^0KSS(REYWD5?8g43 zdQ|&yseC8?rSiZdaA@#7N_Kn-(-dxUrFv(Sf4fa4O^7l7lIVlX6h6JOgf~qaqzO(x z463UUJZk#lr=2qdDEdvsci+)7eFb!|T+BJiHz7IaAmu*W4|(T`#EHMhle=OpeBBX& z`Z1}{s^bEduOe9cV=gWGag+=vO@^g$KggiOiQ8-(7=AcaUfL{KrkD4Yu11I=oNoTTJ$TU!3VZ92K?_Y>57z*v1oblfD zJyKFA8$FHAQIN|}wlFq?<135lLdi6k*R+Lt)mDnjw(q6m6C=gDk~M9ycNjfiohg`< z$8-NnN1$bM1{_V^0;xCl3Fq4$i>G=o6#bjzG~(0*t{9cVF)sG7d||58XftD#+uOnG znHv7YjePmzHVzzO&nd$Oz(|7}c%Qb_q3Qa4y0%ACI352^Tp2x={tD0G$xeyBc~_60 z584Y&Yyqzw8Ds-Ki|2NBq$7`yaO5;E*?EO+ynd?->l(-MC7m?3+t(_z_*#hy?NjMm zVLFE?4uhY5KSb+AA84}19{LgZm_o*Q$z(^u1uxfiylYn{Hrb%V+V9u0+ut-!4N|Ne zyzU|NSmQ|INhQ9fjJ*5uPfAh`;We%?pz!!0ot!&{V(q)*^y)oa7_1`;Ny|oE^Kh)u zeJ7lXoy70k)oGqx6nFg746CnSq)`_S;tbsex-wS4{wMU%bH4=|*XZMR&3iOhbjK4x z%lNUL5q5ad8-7do#;KBdd_vF=F=6g@_`4w<(taJ2-S6jvg;L6)ysnwDekjn_9c%d7 zA}6qyCaD8fEkaYZ{ud#Xw%AFx?F-p9K7;+XFGm;IPAvSBjPgl}cy-$V_}lUn zPL9kW`_vya?c^5R-@QoebfYhxx{)kRrox1e+q%MQ^&m_gV2=+UCP9{23KhIo=4tC^ z@vZ5mWTE1O5mRl^U`j6K&YH+S`c%Qn97`x()`fdgXKtI|#kD^wxGFS;4LfawopaYx zy!Tujobt5t#LGF5?zxA(R=T6CVg}T$pMZ0YKNa`I74wPbYS?=z69y&2`oMWy-uEB; zwpoM^trECsdM6Cp^&PD2TI8yqV}u|2f2pxYEOs_xj`ZAYV7dUiaSQ0mm0@)V0bQ@d)}IWDq#h5 zZIP6!uIn#truXx2RVQ|Gj^|lLEs!&N0+!8|NKo;;gN(RTq5x9TO-hA;d4sI6rrumjK{YZfzE~l{Cf6$apLNDD!O?Il>cid z+i*?VYN|v}GCXkjv^ucZaRL9E?29XkBFMka6(2T_Jbi9-gs$b%{p~el< zyb2_X@GTGybwv050>)1f*hJOr~v?hrCbscHru3+we?J3;6(St>! z04Y`R9is0!l6ze*`nT^6-IwgXua^Qj|B2Vw%eYal$-0?%Kz!C!9E#lQHCnoj1E z?t%S0c-K{Woj8yTgT_EpL^&Xd|p0W0nCEt9rnVpr6h!=();hXAR zXie=%w6%8QZOdNDQ#;<`iz{%qh;n|D%RmaU+qG#59Vwc%c3knHXA+q8JfWLECr!tsTNshdwJ zZIwO~lM`&_R{0u;|FqCV;WY?>VemsUx<095w%97 z@x5S2-nY7e)J)W%_+y4}x7>!)_N?IM*M1!Qb2;B_x6ch;ypcdEmnj zv}R2usvP?WdV23f+4=8uro;%V_qEC2WtRvK@`}aRc?P^~n3hDviW0hR*}@7D`XmqY zBfWoWcS%lv<$tHWWO(&dVf9D18{zmE`lo9p3U+dWjiZZHSf?Bosa zo(UNe?aumQA3T!&N%We(5;F=lSpH$J?1L-~QZB8gF8;lEmAnV1?|DP3nx-P9E``CF zZPc@)5-RRhBlWvdDs8|CuC`eWt2_$$dyNyP?$bt}Y)y*TNMctbfxp%*z}f$*;PdRo zs4*%678{?Yy0)1#Ve2~1iaW=}cg@>#Okb9 zGLxzV@$SXr!oAoyPW|;rxDb!ZOXxiO!&Ty29G%RltJrPv{)tceVZ z4q#i)fjHFE2frSei|>Qt`M*O;MV+0FxTNU3_-%`1ll>8eoxIM&#VZGq16Dx6#Vm~5 zYXm#yUJxFc>;%)fy{TTY8J3)>61q?QN~O8MJkKN$mMNFg*_UzLWOf)%)Ob_A!Aocw z-;dP_&(MRbr{H0QI#*A+4Bd^gX;{Jw!Bc9>`dDdJbmBC4ajy^O^B1vYqB=Dc-9R5} zGu+hgB4#X!=7$~iQD$o-_K?U=7J~zMTTLHOhI`zeRYA`O$D-lxS+uAqS^Bsw#?YLN z+!A|&lcaXD+jbk)sCp;({<HXgHGf~2S-gH1_V zRA}ynb(hrHC21F2E?mPWG&T8el_y#cUyYp#eo-78@)ADx8^t5*j`Ga8akSL_B9%5=B)6kq zz~xzjpsuZmW23^*dFvj^J1*Ik*Y{wjP&p`i&E(Qn1=c&G$E!`HL}Ro>#4}RIz1=$U zB3~7*oHUD1W`Bf&;h)I1j{+Wg6M$Ft<&wr83ry_eCC#(r@xsScT2VTTSIrKWzK*6> zaWf$4x+;6G?!l|(oTEObDZJ`Y7>;S)F2;%@P)|(*OOw1fUAB?_IgP+E-FCsiBd*8^ z%Gm3M5$dXprM{!gd0Jh#%pyP$Lx=dln4}V_yp<0@8ROx^6&t?#Y!IzHHI=*8nn1K= zSKf0-iTe+DB^1m|qKn2^T#i3z?%{OsnIa|p10!&tWFK_c*_T&k?B$h3C49%%82fkE z;m?+fq&7w`+@fj-r|dVv#`AAs?}bXT_Ie~HOgT+ev99>hu1#pE7)d9MGsKoAD>j~L z&6|QYK)`+zT9_TfUIzx_mSNF+!CHoS^+PLlJU76&mfJAR*o9-q4d+8gH}Kst+vvoz zLF_#LHZ6U!iMC6ouBfE$=;`P{xBoTL@=KR#)!O0mhyN|&4Ob8G)$#L$;bBDJ=4_@Xw+c2u08 z^2jI-8*R+pVpH&A+8KI&SOs$|lF-993(lRmSo!OED}3wv39NeGanRiwChib}x@@-YMM+09uTZ9JQ;>MVHm>>*fGKMkVIHsH*vMEV!j zhwV?Qa)C!ME0p;|fctSOw=v)ieYW7xtKHD7dpQgU&xY@J2cq6(CEmGjE^ZGrhV`hS4iY_P?o2bBc&H=gsfDBO>~<-cNYzu?iPamYL|-j$J*)s6c=8s5hDNlx*RTtUX=GX9L)EB z8FESZHg3&d0YPyIFzj0ybRVzB!)CeQvX(rGO)jS4ZK~3>p~mGZN%XD5W*X#rod#Vg z2Hs=GD$NTxP*K1MO1{)mQ!yMXFc z^w|9AS$HB}fvWrez{7Jrg;UerIrg(QdP{TRPZFsx!G9bK3$TS^qb@w|)^>amz=F8o zuZ%7%fHYG*T&6G`-!)&QBj4hN-4Lv^|)$n zi%f5hw4a;v74U{5T==ZPALj+)4VUix<4y-^^Xh?O{Bdmld6uq7P4W?q_uz0J$(prG zmmcou#xKw3;;Q)bVy9LSI*pny+!>OE8XprW@yJ}n>(S`!`&V3D&|W$8MHd*Yp(|aF zui#X$m0b)JmSpFf#JkTA-vH++RfZM zLDq)baFtG?V0cd8lr1`JKO`Gw>Gh(Va|^^3Dp_=NRXP~ONJ+W0?yNj|GU)w&A(}tx zK>-aJa7i;4+!fp)#Qq+9QMbisA-Y&qpCEMAwiCzNC(3Q->*MjZP1MiYMi%jmg?JlV z^j^1&phGvIHu;h4L~m_r#`sxuRUOC*gSE)FRtp0*D&Uqqadh^P2CiRjz!%Jh^6e4U zqQQ27<`u`mui;ymXZYZ^x*G6}9LD!Y6q8M#0Z^IHjl4=tq`lKX=yvsju)fO$VUxun zR%%cbHPt>qgXtS_Z~v+I(#RX)UH##B@jFPEv={q~DHU8RKMH-P#S5`5lVS42e)7;y z2@un|AF5vYVaSE+;=L)|C z*HdZh?FG5>G1Y8^LjMION z;Dw#E=&fYUI*~Az)=uid)AI&Ggn1R*f31Mk#%p+fZ6v!JjfMj)`Dh@$cP+ma3EC&e zqI_2temfk>d;j&qTN97cLY;7))vFs`xNj~T?C6G%mnldTn8QqU{#?1n2-lS>aPEQg zeCSY5d|2m*DYH9pN2}W~EATz(tqkVXAB-^4_JBP4gcYmp&ZJ9zN9DctzM~VLB{K1n zZ4_QCQNRMj_{~g-WNO?*_?jGjKN^Eqe%2vzOuM1KaV2yB#+9P36mjUHPa&F0GMh zw6_obq^-xz8CFE{nCgkRW`smrw(BE|sryWNk_m23^=wwoUV)ERt;Z+(Jt3|0T6z2w z9js4$Dl}<&^8+2G&`rTSD)BL$?KThM+$AGP{5?27#DJc~E92$VSJaqcN501wp!)A$ zU~Hu!l*(4(fv<~k{-jFU7rdX+t%2<-Wu0#OAr%iI{xS(P=4q8y!P2UF|Xks zDO88ho91Pp;+-cvy5E~`N83S4wJz31=CRT>XW?|U8GovY!8vP;*;d*)=uDYPDIdCX z_e_Zfci9NLUpg$A?Si?_es^|#eFV-<4#d?}*P$}VR~Dw#3~i>C40?B{<){t!vkv1~ zp{wAK^-uV*`8(+}_243pSJ3S>z|wDT;94CAjd^ePxv7w_DJ7(GKTBI$+&kD-4oVQ2yG1l=*Eaqw87zqxcqH z7Z2r$?V8wo{(a%wxVs9o|-Tr+zk zmyMc4b9@0DOs@K!eg?|`68M4hSM>rLFavIp?u>o#P}JVcusu< z`ur0F*WvSdgLfD4*XcwUYr9z@IW0$hWjB7{u?Bj6HRff?!+EJ=He5gPhSm=p$`4Fy z(ZNB$$+#PL%v9jjjne%q=Q{0idnEXs_$$9wwqCq^yiNRIz7$IyC$N!i0=lc^P-m#8 z!Uw}i<-iVE&&Y5{Uv^Zec#sVh(z9pm^HwqF=RKk^dwJ6%6@IPz2R>KUfWL(weDRUm zOv7jL?br*@b-Oj5nRWq|xt7uTHN&~{H$NyD&eKGaAkrE7dLlAlaP9t9Xpx}_Z}ge$F^J%ZHSxg zC9-*m2F@=^li&xO2^R_(qR45plny`UxSKJ`Y z6AP+LsmrgPcw@kRI`F11WUXOYn8{i`DqWX_&7Jwum$ULa1|={${{Tk@r|``mtNCA} zwAYuKP@yYo;8CYC3Q9XJRqtc@LC9_Lj=o0Coohh<%zDB0nkuPd7TG-2$DAvrXj3ko z?;HP#np!f7tJk2A=S#RxwJWTd@L7zp9L{C8Ux81)6|G*^8E~@=Lu)3q>099R{RTYi zl?9tTkZi5Hb6IK5QPH{6D!NniL!$IYuuqT0_-BbS_>HoGP_0}jK6#Z4M^wv3PfX;C zzb9evklV1+f1qIDV-A=z)V z$=%h1j8(sbaJwJ>UD5$d6pzE#mucKI?Ii7aA5MWyJ#fp*D*Aii3Z>n?N3*kx_)l;Y zd(E82t=q$3q}e!r9AbeX{UT}N=$Wkdx}D--8CJwypo=peL*3P(=%ewE*3Gyn-2C>N zURq~D*w??{n`uouLam{*PaLo9T>&FHeW8Ny&xCL8l10TWMaa`k5!PKD4pUCm!qjzh zVT$$LN`PdzMp9xjT$f)CtcT)>T7p@S)Lh;Z&BoWwQCHgquZ4%P`p+1?wMd3f3+-Xg z;iWui!y*h&@4^3OB%_OOA*>tuO&l<|GwwR6L(dhm(4(>mzSsEEz}RTP?@6u9F~Wv# zwW@J*>_yhFOa}X4H#T{!h&OZ#_zx+&F>tM7eCQ+^~_ z(Kdjpdm3*KorMD$#L!*+8;VN-xS+u=n6J(N{MVAG+V6)7V`w z_l}dy&r#4>Ax0Q7ttSKyEGO}?7T#2x0q%Q9XzZ&4QMs4k?%*9%rSZwWz~v3y|7AuF zHnTZ(G02m(l|jC^PNLO$bLEe8Jm|O2po?6ly&BNHAZZ!JVG=;G3BbC}!DyPFk{43|H~te6z`DSu$9D*FuJt@^G%{ zwuL)f{ttZDeubmskHO+j-e~jVG>_Wji5A(ZoP`rn?bt;2%S&R%*V-6TyBuGy`zAei zt~$)gIw{n7ZsR)rPoi049QzF{rJ1|uvBd~``1U`oTf=TJt1*_Yz34=F!-w;GrL$bE zbd`?%_CPBwQ#L5os2o4XhNsQY$26;4zEzheOp3WiTM8bNdxZ|aT&cwBhSYxP5X`qKwb=G#DdauMaotI>9P7woQ|MsL@Qrx%}+rToiP`F)JV zYeOUO>7fj2{p(F(rr*Vy_wS+HWH%YjRHh>uVRSd1gw@B|L0{jTmUW(mC40`%bf1&B zb?ivC3K<33JRZ~E>2uU}A1n{}1MAMZ!s7$uA!_j)etcp$J}ETA6-hC|$Zcu7@M-|6 z8|TBxeG4#n{8Wx}?M1OKdU5wjTK0RpSV()JObIsGmtO}A#{*{3I66z64J>xU%FuhD z^V5LuJsjrjhx2Q1fINQMi0 ziZ3O*l+pYp>|j2YGfo+CHF$Hz+&O$@^nB5=eh3D()PQpzf39B;$75Ccl9rdW>w34E z)^*u0|7w&C;tfM=Hhm&~_-_mk*w6xFey$N>mu#ShYAs=}xQMRxen_}S;HyEM@rt!M z4)5#8{UoS81c}0R$5dhbjQw1?cOhk+9>VR0!L-O=4POay z!dDfqr@hLImJnOzarH95U0sL+^7V(LZwv zYuebMgK-II59tUI29_XKS%fR^E~3C`N_eiR1OE51QS?vGW92P2uw(F3C|cWvU4FNN zj&>@)SvDTE!!LnZ;39lo@5)Yhs>QSUQ&{De2JZQm#1rK@n13rAhp!sRgBNWTlp=z- z!=@_X&pTgX!RLCi?pq1j9jBvjQ87)ZHlV!^!#H(UGxZ6w#w*&BaZaN;o(W!oCnD=; zrNTpEe?w37iy?gj1FJ zvBCZTq0PM)tX7wvMWeUjwC#cHU+@y_4TAZvX9K;wSqg(I9q?N6d!cx^9z>_x3KNnv z`0{skw37ap^;dS#=jokUeTEJ0JyHepaus;zkU0MHc|W;b+koFLETm}oMmN%Ci1DHW zx?WU6#}q@pFy$3ZJeEx@1^-~&4lQ9)>`MExL*`2|sXK#L{+#5|Cl=86Njh*!z8-&VnaNt4 z?Kr>d1L-^%i~m}T$nU}_uwRje2WxfVVen(|6`o=%vmEfamL=OAn82a+SII0QPVCdc zoDVPV$WuO42*o|;@zhoGF>q`QjR^|nxVjY9>21dOmr{AC(^o<5q6-bFPQa=h5pQkj z#^FOt$okGkKH;bZ=>vYszuocV-@`^h%H^?Km3oj-+G9GLKMIRq=1RLHE3TMzfECgb zxKC0Fd-%(FaA@{D)(#InG z&GF&p_zdy$k_#}q#s~iz`thih1IT+%D^-{;g0R~&V7bu*+<7FNtAmUowCe!z!-d71 z-A|%uthx$`&g$Tn4_MY`Jxnznj;i$qSo>)mtyZcQ7ar-x^O8c~j(-{SGfxt;>?Lbq zt3RjI#`Dmh)~wZ#20rcSIHgIGrcSotj54{ndb%+-FK}YLB~h?>O%GN$lPD_QI)L6P z2jN5M1}M$H4fDblv$p3b>b7AYUC>X(IcCZ7Nj1ALCqIlj&z5KgfwS?RvpF`^YH|G1 z61M5Ik4A5LM&kpz!ux9kTL(=ci}VdF^wo#9uPv~=(+4W?3PFv2^ReiC80xhAgh}s) zgY0_%+>zg;)z??@@I;B!dfOQ4V>NknuTgB(t_stKAa6IhMC9!)*~;qJtLHX~nzn?0 z??Mn72Qo{0)iVldou2i9I$k40LNvDVZOhE-psgJ=8E_=L4s6>$JQ?Ox9V=Oobm zbS1P7-zqDa|3bR{uW|5uXFlfkL9`gXg7pWEWw(4KjtKe+4OBb9I zF$hD(?uS_`3L!M`JiH6K2OkKj`D5Nwiu$R?y`|>g;Q3*6;M_#~FsB2heEv>o zpbUq{3Ov7qDgNHthj-;Zfj$A6(rhsvt!IzmLnV_aHL{cgmU{5CisNK`E)lb4|EBb^ zq4=)J5-xNN6K7Z`aQC5FSZbq&PMea%fjyGZAx{wxKK8(8e)C0bM`uc`$st{-h3+P0 zq`FzO!Gi${IVR>K-B=mNS&eeMd3+*$xMB!5iuCZ(jUp)jt-vGIcEZn5^|EdkyQ1ga z23ba+8l&0+Ds(T0oDDO$%d}M574A=3OM9X2e`o3Xax00nVk*&L*5YsDy=eG)5m@TR z(ypT&u&84Z-#Y~s>_m-@ajvLtY=E`;tK&Q`Ow zk*A*m7n}{_W2cvKiJkddAfn>W)zC=E2a=61|M}E7z1HK9z%x8i_ zaQrtbd?L-CZy%3h7aI?}ySNPQM*=9Wn$Ksote~EV`S8`TBDZByG zlNEyMoiwO8kVYr_=YfKkGQL>tCd@Dc*~RZq$m(>Cu>N5WbQ^1l=TEBQo#15IU6gF6 z`Mt5ICv_K)(um{e3iG*{!;o37g^)`e=D)w zj_e0hi5Y=$6S$z$82S}^QF1|44h3>5W-lX%;-^H2ct^5Epp8p2Jm)lYKi!Ev2s~3V+AEDCb z7s6`UVOqL2f!bSUW0%ewWDhbHqVj-!V&th0U{Y+#Hy$mP=G=G5&iV=YI-P@GQZC73 zW4`doc>(+W4#c&$obcMCn>6S_8osc|#$V6e#l4>=a#DdKWL>L=7sKvTkK81|Gus={ z@(`RgQ^cnQ9qG}M5yGA;ZScED2Q#PElVMyQ{jRwUb5D2R(sdSsQdzaE;>+vGZ+RU^ zwM3uR85|(H{Aqk;r8FlvY|33)hayi1h4lUcT4OHE?>(O`ZMZ^bWtrrwmIRqvR;`=U7q2_{W9sEjym#3(stf-?k4K8s*0vOTUfhlumYwBItv=v3AxN?w?}lD0 zs>H$f)P)Jf#;9F7n6Hu-zV#`<8Cq)iRoje`*WD0|?kZ!8lv9)K?kFg3{3Co$E#rQR z$I*=K+sN4JC3O3zj)qklsDI5?)|6&Nn%%!sR^do)nfhCHXHbG{%-n}`GtUS=?3P2X zmI$F_Y#1J$k`4wD4MKy0BOe?xim$C$jH)@~=-V_cg4a7`nt>5K#J3+7b$?7{zYbGg z=rmrk?TZk0_!VteY=qWHC&6*5l({~5SuoO+%n}byK)^gFR;m&~({w*)C`7a6j#9zX z*M&o*cN!ZVOBcl1s*V=k~+?*!0Cxj82|Q;`0mAS zxt+><^b=h9@<3%C@imN&JFDYk4P9OzE|F&+2XhyRfcI0{r#cnnQ-Io2VY@1WUEN9{ zN56z_yk91Ke`Jb958`3baRWYJC!p!bY%I?mO>>+SX{JQeFshfDu%X>K@59-OS#@XV z?O^8I2xGhwW(<4!e1(gq(G=@!3;}rppWp1t9faN-KEi@tWIKu9H?F0Y01~I3RAG~m z4y3G`4_{VJ#dU{!i++Q)u*%hX>a1~<3Z?7#)aL?OQ%kO7IP8x_qAL2umxz1x;`s8x z{WS5hw{*Yw09hkPqVCiL7}&KVn^_g$i_}_S%G1%{S1r+?`?P_3pB!v5v|w4;B=(Y5 ziRt>PXnt`&p1v0&;=mg0aaj=)gYS@aV*-47mxkYl?5FV>!-d!Pj$upfOz~WwLK${7!oSko+m1DY-tO1wW&n=QLEX%%ogh$r>$1}=EBYfurX0eWM0Yoqt0(t@;YO>^;t2CYLyO_-E)= zc%OU%GC%sc}YJqNdrNR*=XjoJc!=zh`w80i3G6ZZ7ai>@?a9s@ zlJY>VJn!x)2d@c*a9)~CeD2Z-=ZL--?skB++TA4sgAX>gob9| zI7f_oOxIhtVQs&1T>7Mz&Yayr*#S{x;#a_>x1=3Hqc%DpD2A6Vhau^05gZtjfaYDV zQ@;`2`RT@!ti56xS$Q0WVXj-j%-0apla^8a2`6lhmS_T>rhrY#cKkQF9fpqI$5Sic z2u(}7@zNfhV65de;r!&)Jm^BcIIg2V9{D^2Uc4U9ts#%V)Sr=iF5iMY*Q0Z}4?ZQK1`IH%Z3IYkmNq(%?gK1JY~TSNKOco`RWk@1zS z?GO~-AAfB)O@mb@)3kf4{JpF{k@eniWQ$JAV>;S*S?F=E*d7dMdq{kOrO7%b;s#V+^0M3q2=4 zrax;XQ|R(9RK+nA{o;U-WBGuTO9EkC%quz@7>y}$UqI0z6m_@Sl2L&t)+dL-fXXp= z%5@a~J#(2fpWY`Om67;fcQxwP48+pVP^k9M!0cs5ag*vyKC)|@Z1wR2u>3(jw#7>G z9h0-N&HagNhh;*iq&?zBqouU{;1-x=r$EokK9-_$8qO?vPEgD3za0^r0U~ zd5{#F592x-Lzv2N{J5VX&w4l9%>N7gE?5kC{-&}m-kv}S(ks^Wd?Dp^4!}Vi#yo{$e&wSg)oj>R5LZHWnnwCAlv>&)5iG_I{M@96yL=-phlu zKWjuV@RcXE4i|8e8aOE=2zT%96(^f@hW{^QVaWdrvala)u==nko8^qayNYFW+}~WL zIpUiX$Ti0<0qyYSNg17;`xzqR?Xl_hUr8q zQp0C5+JjvC@39ymTJpv*0#8`KmzFCFkLvLAr}psSdKx{w zJ``j39fzt^UmVo0jU0!mi)~()=xO~KG55AUK6{bI@oQ{&$DBygo0mo=T7jTDYBnYh z4;S5n;>l-aC_HwUO(mo=#^kQPW?VnP3I z3N-6{6v%rGzsm69HA&yWwN)K$JDH1fUml{eV;THBK>8a$Iz-LBWwgdC3gKS?8D!I*7LHPBM*QSWxhSBFkdnZR;^=Y%+osy7M_cNTa2gq+ohs$!5?_yITNdvB*LoHg`nB1 zTjg8bW(utJ!k%BZ^QHbrXt9LxoD!G~?TKG#&#pMyv3V>zXcmEe7egM=A(|De?9pJN z1Fm&YH>rIE~S}sI|@hh z9I*eGOLVpQgD_w5I~Xhcqbv5EFfLRT`gd<2vjscFV^P*n8|#X;BUDjEc__bGogk*( zpU5rpBuWpK3a6v5QqAmsQpipP4X}`AIQ^o+kh9>4J*YW!33gbmz(%s&Lhq6g+}yAf zH+rjynHN{%)(b1>i4?l7UL1}0QvETpL5mFT?dLsHZ&5}{yxg;C2=naM&~!Q*&J^~f z=H#t#T4@3{NL{JBMLoGlJ{%{+&SkAGW*E{KK`Wk)WM%gO+%;uCoz2$7OFvyC1fPb4 zMS4#kV)tS7Aa8Wjp3L%dB{<>TFNmq01&1DP#rx-@ATu;e$dpQ`?xp{zXlVe)^AYi6 zK^ab}&gN0la>;VhDmr7{5k0$2z!!?DkbOam3sW=c;pKnSVmVe&_c?)a3pVpV37bA9 z;0X0OzD?NUa+>!_OT`?G2#iZ#g%M-s@aZqg)N|l27;*bLY&a6Z=4L^5t4sp1TZ_ z9mZl8U8#6;E(JE+(3cHS>5t>a&BqUQ3sJYd3eOMMU$pv6rdDU=$tK41lzc0tVhnNADZc(4`MyTvs_ZUHT(7&iqfpZ=J;B zGqZ(r&F3+)*a0j(_2A~a-5CDgZOT;8z^H~!xU2l8U|1c^b|=om&NnkKU0 z=rJ@tTSP^IFOK@LjZf*A(r?Eu`0UtwNH=t*mAeC(s!O~Z+V8H=@zOh!Fw;$et9W?|>O@2dN_tc2z!?m&dj#p$;GmWJS7dj3b z&6VkI$s(}{9+&rp+mCP3t4*=s`DidzW-r3)?Ux}*?F4;pEv0iyB}A^>KWH$?i;@wA<5b)m=jM$_MN=Zg|;!Y%m#h)Xa?^4mW-kyJ{DDx6;Wxm>H3TKDhfftPr z$bNn`JqWL%D^p@5tnMOu_IU_KeD1{SoK>l8@O8@3*~}v@-k|%P?fFXZfARp`XP`d9 zLg=2Cjzi2$+4EwUXeJ$^^4@%a2l`8>b$b8d-K#~5S#AwRUR-HW?&li6qB zUDG&VaJ;FfuhU}Z`N{9zWvlLLN0`MprADVxjPB9LBK#OG$ z_77KNt$hgJj%{$*A8Ltx5Bjld`4+CVnutmbX0YW;g*ZS8QC=N0SJYEoiUBbyq*}iP z&EKX`OP?%GOcSI9;3n93uZe=?%LzRf$=1%6VcOG4{JVWEeu_85zawX(PuE=0v$l|q ztxe=rb(ctWjU8#Mc}_nzTJoClr(lJz4X3)zl8TBAu-Vm|PrWolZK>2R?sVaP-M2yA zca#bGy1Y=rA0CKXj+V+Tw5+8gcY5xC^*i-(ysAC9ojoU>uJz8prneFxTx?v9J>!|Ye zIy?Tjbr5R(mV)q;Wcc}#gbrVEAB?_jp{1u&DZ*$IKb*aWjdef5`-D2tCHncu@4d=B+e_9%N>VrMw+XE&%lf>SL(BCcrMOy)F%u3 zr!?5%I&C-~298H+gths9p?7r#t}0I>vql3*#B4b?QoWO}C>3skGdYQ?a z0XY4;4xbwOPyE!-0X?cDJ?E_*@ZEDbmyf?qqf{JGspB2Go&dsx!gPFJ^G(oQG7x`0 z*Q55$nb=MG-XFbn$ot@Q+^G3R$bzLfK=Yx{CD@gxEvrD;4G9h197oPm)o`Mjghx|1 zKvSE3yveLKUA#!zOfdCFqbA2=z+C+Uwol&!(Qn%)Nj)+?hvXY>*hI-FOMEUQU49#K|}VVSHo8P@3IdJ z>D`g4c&) zoz?}|*~c6ec4=Zt_5{dzW=Vc>W8ORd9+mDegjr()Sv_$DT`DZZ$ftkl*xhXUerSsX zvFe74@H0h>na)a4w(O#r4mIHoRQ9AdSFSh-;~vcyZ%AeAsS{+pUML3PN(uZoyc_Pz zS;6KZI=oq1Uo1MW#EUG~VR`#?^7v(ji-sC;kH1q;$a4UBAEa%H{pE4zq@QxB5srqO zro3Ce^0XOBWcH?&9Hnz!SLSPVPer>xht zOSJaFJ2>0ujblD8$C1)fq;O3?{40wVOTOQQFz1(~kx*T^?_3_tZGH{kB{XM`M`z&D z^o7_-XA1qvUV;u2MH(m$1d9bz!9sB!8)fz8$PLP9^mQVl?51E6UJkOE58+6vHR+UJ zrr62f;6Zl*@6Yz*mWk78=dHQy{!ie(@6+Kzi=?c&^3uV2<`#0Z{wS18@}_!cJGw@n zgu|bQ$%Di*eCyM3Y#pmf>mpLANq3MqqpLt``|ZW0W3EzS-d$mkwFMjt3d5~wijFT< zpZ;I$y?Hd1VfgQD$QY72(uh(Ng}vF&eIZduBc-S)W0_JaN}1=(nx#Za8c6nLKleq0 z5|w62g9Z&6HAqFh&+q(x=RNOP?|IicYn^{zYgyK^_I{qdpXt8u>-v5_Uy>g+4DR*D z(=2xm@$AgN<#)GffCd*MU=YGC^5*dHE=ZIXuVCuk^=Z?!K)gHDBv8mmCj)05(t%@Y zL}!#Tsk<&mYdLK3VU=g>!0qu+-e$pkoBxBvUwwr_Q%BGgbkRE>{OOfjPw42@5`jTn zKN+jwjZVJpf;)QSa9+qR4%K{x>Pn=Ou!b=tf8hZrS^a>zy8UJLJlapD>8BCb=F{|t z2Bo(`JLp*LLL#S>M(0dvBl`@_lAwQY=dPF>MqjG znVrodW}`0`Yjn{l9hdJ3#$<6Sy1rR}Wk-g?=xN&2Yq>J9cb$!5pW8&DwHmIL&Vbb^ zl63Tk>#WxTRmwVzK|{l3DE%Q7elNI0`tOH9N%dTIoa1{lc1@R)JzA4%E?fl}w&3g;C#E66-2wRNR*gx5jfOaLrQoR_0nV zuOSNG8pQ$8sHeVW>+puU1ojCo(rPvt#($Vjo-7^?JMKi199Pt~?Se+&(csL-?don2DA0?(jHkG`{Vf1-0LI;@)ln-ne>_ zru~totu-HMWJN5avCkZ5kYD6py*>U)cub|?b6FC93fvmyX+YX^a`A=^%&yu^s``zwYd#00QP##;5_wEXs0Ow?lcsSF z`8Zs2CdRFhp^EKtpm=f>L^W;z%}2YbrQ{oG@b?SJ=$Hj@(n`32i(x#XKAv^n+fEaw z_i@I2eTZMBi54EluweaK=MRe@X6Y1A{yYP&op2y9G(?A~rjVwbcEQLQt8tT3 z7JXbb1?G#CaomtA{kH_buHhH$@k__N{%dq7))l%76j8+_6uMN^;r@Ys_V!CjC|x=h zx9(u^*E0t&9+84Wtu?gFshfnfF2oihi^UhF!lqIQcv*LteYN@m(>1*aB#%+{H3x(6 zc&7w1asbnIn#0$k9)=kmhH^^}(5VK08m@A&n^9Aiu$9TvzR)jd@lvR)6e z_G;*i?@8iGcWfB@oIG%wMxv9K!oIFX=du^R9A#5!tr3VG!bXbaD@AuiIo@r zogQhU#JOk{DYH3iT z8LIktz>z6kWWkPi^w9b^=zBF0T|FgW#)US>%Uuj9R%wVm^N5iaOLTD+N_m)rhx2G0 zY}kp~jzU^(vmiKJ7Qt>IRe!zpZ$6RSeN;+(u zbsh_%8p-iaQ!4u+56#lV?7I2}x;V=LPMCdXJEfLjXw3@P79lIn3n>(g=vfc_xnp6d zJsRe#t1*FsaFU-f1ER#o$;IiD@we9-vgeW})ePFtL`IC_gl)^{ox#Oesjh-s>aR1M z3Pbear(!g@VTR8wuaoCPTUzpRiD#XscTUIo-PaV1vF zZlY3rgVp)|kZH9kp;xAc&}$Ppr1L(mu@*BQKf)6lR-lD*lfDzb_#}*dxd{A=4x)fx zMjgFv@o$nJUf3&5cN$4C7k=`Y@^BHO5y{Pe-&c_r#*@k9^+$-rg;KB>VUDMcPGw^5 zABAk6KV)i&16KNXQl7FZJ+JMDBOW-x`M4T#yWIz8&c8)(@0$#^s(Z76pn{m4B9C}uJ7|uDo5{5F2 zX}NSM7cG~Du_qlFyY5_cQm@3N+x=L%S!+RD8IM7oi`e7I+KjAi64wBVqS9WIaM*7N z45|4@CUF(w>}wgYE%yK;k~sp-fzqh=t&Q;850mr`8BAS1$XMMB#Ao^DAnR`n)_)3V z)nYHGwW%jzYCo7I$r&g$b1`JaCergABXG$61=%Z*z_cSq1WM)MtG_$hK0ASEbaKt^ zXZOTD6OJ;9>&L;Wu(4#$tZsoR?<2cv_&d^E<^mqM5*&!LR=m080*Sd;!LUL4pp@Yc z3Y$6Li2q)CalaDSPLLL?@O{l(-@pTXmnwR#-~f3c69g@H8sMv>R$liEk#6EdNUY%xnlS755%)So5pwCqDR(Uq95bA zII_xQs2o#5rQRCSfXn%C=frgAv%khjZu5kKZ|3A-b{R=}yBVKFO=J8kn(5%(P&Pay z7H(TAz^2=G7_$x^+!inwg?Yua!ZVYaO1xq-9p)iSze|O0A3}?30dz}`#xQ+CXN=cn z7CCuvke_jQ$cY2bc4?3)3s&M?8Feh5w+PBMAA#c!HqenT73rrION?|FBAE^Kg1O7; z>9&pP(7K($oPqIFcTNOz^kx-!UVBce{yig@IT25+?4)eIJZj6C(9bVb#Qm?Lu%bDD z_DH(p68BROy=DSDO7J08#^ab@n*zW{e;B;r#F+t0<>6bvQ97A31Z(#jU{@D+PS-ob z-x>RGdbbk}c}!;$x)+e^#fz!n=?Y?LJp$y^Y;m9YMWQ<|mo!JL!k)U*0=-)kQGCaN zs8`D3y-^9I?CU&8Iu=ap7TM58-IqyLhXg`LEp^_zi>$R4(Xf}UxRMilsMhegwRA9c zu42gL`MtDjxdL4km`N5Wz9U_NT(G^cgF5U=1$O2mvhh^={1|lwqHNb1owr-;@uNVAnok{ zm6Q#qKDVn$(WZHDa^xQn+-9L-y%Ss+b`pd}VVnUv|Y_lFj z`|jdo``)lf;S0_1`$k4{LdmZ!O11^n&&ei{7L47JMvJfKV45h6krQuYBc{G&Of>z# zL7)L!xdu0n6U=8t8KLN{7c@p!LP*Fxm>#HY8@;_AlFD9_mMf3RvXoA^!b>M!axKLD z=tVkHHw|QdAE2gE=fO>r6V5j&i+x-yV7-4Jw24zdw%vjKJ>3Yy2F+;i)l?=t^$ebF zj3Wlmv^lWcVP;*;1!5ikmJAvEq$~c2Ny3%6~^5?gG~+2 z$~@2$`rsCcQjD7rMx8vbk=(oAXeD>vk5^^E$7_zbz>*P2+;*cQFHFSo3mRxgaUR6K zzR8@(cfgzHbE)r&*EIjsAblCm(!5vWpeE9rNdMbKrw^9F0!s>KO-I4(2@`u)Y(6b(6xcAC(9v)r%O-N6rjFKJx2R;nDTNN^l*Mfx6E?JV!N@Zp{LI1w=X2o zezz%qO(h&o55(HPdN}lp&zbHrQS*m6`|~^xBG1jCtFC@#oWG33{C9q!lIl-Jma9VF z>o3gXk=N*nS5?II&Jxs;asutdG`jX&9-jPLNiXdavNha%e70XPX6C3sQK})@X?j!b z=5kVan|lU*Fa|5{$?S%4F>uB#401*$(zjha6lWYEu9!pLZ6H-jAHXkLn%wY^$r~*4n&B)#N`)N;w0{Pj0 zl=`%!(z^jHKL1lp=NeCfIrKa>RJpVT!`DUR&s0O1+aKSC0k04lHcQ5 zI?!E5_cU9;@eMcW+B1C8p12pnq`YX@vRN3t#~M@`i)f?!Hfme>omi55@U7Rz{^E)Qi1O z9?}NVa?u(t#YE$+=Y@2G^8@zO{R^}!EC8iO<7p?}mcC)}@;4h_5C!$X7cM{(zi(T)YH5@$lj5YXug*0tX zAPf1LC{EskSNxs0h@I!O-E2Qo9Gpp3?At^-bNyg@P#WPEZ$(uu3U&bDU#61C6*ME%e^>u(U9eYp{o@RH1ed3}r8C1OQi7B^?A!&IdW43h*=zAQX z%YK-V(>(`ag~=NHwqYSXSFDVEYogeoVol%`cQVrJ!^!5`X86NV8^c;BGXtjYiQz9U zN@vSEvgXcQI5BT82b@l!`yU!&@Ah6&eB=nc%HNKrJEnrlfl~0d6rox)ZE;+6TL>l4j#ZV@tu7U$PU4{0Q!?-#oS8MrPMLr#o z0^ubS(tPL_+1-^c=KEU#+V#^*ehHA3g)uy6@MIWg>+}%(+|2hpgZ3;hyg+zPFGKjo0g;-U# zQ$t%0Za3frN|OHY{l$6WHjoeIqxRqzSy>$A8%E|-pMzuhpXqq7Ex7c80o`2V4`y6l zxMYzUKF%K^MwSNDW9JXDc+z5wb8jGVhCfO6Oka>U_(`U>%!go}ABb}7>C2Z3;Qf)6 ztleV^@-?oMlpbwjE%z2t11}d?(7ubF8erh%Knc{@Wf7OFNie5I2_96rg5i-sl&Dz^ zvnsX7kD_oeDBMho8b1g&@sy!tdLZ4XKLub5lW z4&cgx7VlNQVGeH$hnzWcn6-=V(hklTEE6FQ`9zsXnsJZ#E}cU(ROY~iH?72{cOA&~ zSJPd4W-&ei`qF{Zl64 zXN&Lj*X*~{TH_g0)4mzJ`0p7T#S|DgIv3st+QXY=oWbgcI_~q9VPbr*i!Zc2Ak&q& zT4Bh2a$&6;Oh{3J21j2=Yng#bpS{pj(wjWrF_xqqv|$s!hQp4b)ifu>16O?YVnQa5 zMT4I%WZ{NK0?YDL$Q_YP4YFs$gVr7RDCP)#e!vON$WFqrd9K8^Tn-7oAexS|u>5le znbP@+y)Si*{S@3xXKI|Iq0`@!>iJb<^iA&hVX~8qa-M;0I~1Ui6VrV;v54;M>ZJ?i zq~T@ySXy#+JsceInQhFR0K)dmf}v|8pncXNT-;HF%DG-*H?1w;lcUEm0e=YIa^w6u zO*stx7l$+7{t}Cva%tz+RI*Wf5soa>!06jUG{|BvXHaL}w*^Khe!4#`4_{K>O9~?eGhK*i>qw=N@H~W9YYy=necI7GEntYOp zSQ`glPqvafvkw#5vnsgYlbA8J`$+zrNx`v8AJXoGc&NS0MPJc3L^{J4qzAaZ)wFw5 zTKzS-ro}~>E~`h;GjkZR=M1x`XFmC}HJf}ZH-c+_`ss~vU8L^aeB6Fm53WQEvOcSx zQq?nW>Az)f=>~^sq+`lwBAQD;t$PwUk9)!n-dQic^m;F3s~O^1qJ$AXBk7pEeDHd= z9%?-$;q{BL*i`nJ(K2-t-*cD&BYmHfnhee~c&3aVoSF!#R&p?zs^ZdH^GI?)8yO0V zBs-NalL-@Ih`(tXTzz2z#@e5Vc<3~-iyac{yqHaN7HpsapMTL+9Du{>qb-=+nu0!m z80afmM`r~(3CyqTfOE1LWUhk`JPen_t0zA)nQyOf(T|5fa<&GZESU(4q~^j`<2dLN zO~n;GBbf`gMuXfEDJ~u}j>h>;f^j9`ko#>MIR0?Q&nn-To=H~pkh}v)F#AUpYz!c4 zh6420`a@^dXky;Jj@p=J!`_LXnR5MApscnO0&dR6BtH{;{`Vg@H>qP~zAixB5#Dre z)i(ku)ug*(3RVtRr82`;z@+3rEa{nz%PKC@HRq?p37_d0I!YQgR*eT!^?0J#o5e-h zifN+7NlY1g0{I0SK%>=<(=()?b%_KA(+i|J#qN-^DUM8xuVLfII^ii3clyV36yzMz zBL`%s({HDT==}rZ;fk&Y*Yi%npDR*0=&T|H{c2;%HJ{UO!DF$cD4I5|w*((2Nix5e z3!E_7Or29W`1+kuuyANBQCzP>o;*>9oo?pnJ52>{iMYCTP9$zKS_rR>a&^qU6EsOG z84otpvp@Tiaoi0gZA%x^*uu}uxOdyc0|%zTQ;&n#@^&UIs!Yee3&rRZd|oVUx^2VM zeWqD6OTgsQ0#r<+;*a;ZSlB^XIR0FViGLBsddDiDf#EfDpUFeZG|tp|?<{*M`w+g7 zH3i4S%}EO}C)c zC$>?M#C`R4>NDmJDPEz7UL*Ek^P~&nli9yX-qoArt*DmH{b)#Ku3e#xM>$|Hw`N+? zxrf*o#5ZK79!Gek#z@ zpW-mP`8#Q!t%T*yTcN9kgF(`1?5x+rVeVc|3o@;Ys?6t1kB_IqB7qMa()&s*qb7j& z_`U3*ffTyk*$GuYE&!FaY4ny(0j8K7g%>YIfR#HEowat@`?Hs5?z~D*zx_+qdn}-I z<35nO+Cr5ZIRLrpDNrudAaZxNVQ9ArwAjQ844%$|xEKy{dBGA3E{y<(ycQbc%RP%F z&(ZyZJkSb0gEu0Q;L!nfh}p#%ZN^zb-02jmX0eU#s5T;FH>nc!kmFR*1JK(-ji@@W zW2QemL?#$0!KF_UWO!B){T4YuhPO!2!QmZrT;)r$#wv=s+)8J@1q@OJz5!mkItu#l zW|7JN#2~!23(6Ws((NiP;?U6(QEj9HTY6_IhdwQ*RprmA@2O&#dSwmn+Mxx*R%F96 z{wwzFhHEtO?_7rWfIxfTG<4Ll$GL4M7)z^0>f67Nd|S(s^l3{l;Yc-EYpaPi9dGEW zmHx0QTo%@T+6)R~=7HJf>FlKuse0;!4d&)!J1@;3!kR*e zKBoa$rZq%&x*hAiTaP+rJt8BkjBx3mg``L|UMzVhl|3mOpf?i7;)hN9aD06TyleeL zetca@TKW?~Lq(4+n3G3(>zv`ee-^d#*pGBf1vyq-M}D3uhdZMf{5d0zLbNMX4XHx? zin-9vK`PwVx)YT(#vtT0ts!|n%!{;E8pG-Tez?kmiRmCSLQ8<9?^mKn^-?spn+#hg zpTn}cbR1qg4wwI)1b>c8lH{61h^=0Sp6!$Hm|F;Ez%}ADIW;u4I|7ntn!>GOYqWLN zgyok_QT5tcV*Rg;Exc)prH*=7b$1@T6f}V#^(w8C+X<7ZcaaO{CgI68FD!1=rHd@z zu$o#PXMcpq;qf(3=%)v#&`eGTrMqh>BT-*B>m#zgZWdW$|sil-|QWN%>Ci z?hmHkZ!ZCFTLI~Ht0UX{i$E)H5`3SS2^)_k!J7dA8sE)8S17@GDd%yTc|Dd@A7Z79 zm%(q9r*!nK^WdMHL3J%npgPYV7w9>mhsAJQ|7kIlq{S1--%r_<3mqu9og!8;hqyTF zY>@BZVKW!|d|;0SjjY}ZEl~%klHq(vpHhqR$zqxkt%5OCZ|RpegT!yXCcK?u0rmIP z!0yr}y8S>Z*{Obnv?|-;(q?z~c*!!>5=i2N{g4UqC}*SCUzs zf5}711EhUrIJs)xOm4(FVjicxG`lYa(TZ&(+#gflwj?QaC~;? zJE^m1p%RUJIQCwHje4^XLp^7q{;4v$>@)|VFp@|8>Rnj*VGjDKHek$8101(>8DiQ* z>T!Q9_KulBW}W{*(#I#DlXWUC=ho4Q#{1Fi-%2Puev}wljlmM33#!lifP%dNSkA#S zcFD=mg_^_BzpD_XPx#=zw}*(lq8ZM+qsv~a8%J(Ltwi5!Ioz`Q6QezT3wg6tj=F9% z!6m83aOrUim{@57YfR*EPiQ=*;WE&C8OL7Ic7kI?t>U>8Mv=>A-L&HT0?2xzfsT{5 zqi<(6t?x7dE!!6~$8rxC?JpqrQXh~P-`C^Vok=u%&i(o~53bO!Ne6Jzcw|opjl)Mc z9M$c-@S(pg%i2uE`PnfTRxL#1?@vhQUvupEZY9>w4CG)e8JJs@2Aez&q2v8#qO$xs z`*vFv3xV2b;;e#yyN9vYs4<{hI6h9eM_rVoQMbpC=;m(1g)_KNgC0}bazTR};>>#Y zgWT!!uqoWN`;(T0kHO#%$7!Oe9EiI77~AzpbZCbaU8(SyO7XlQBi4~R1#F{PCF%5o z&PHma^OAZuexSM;1!zfasaySJc5+4p`udj8alzJf!?}@==oyP!em`o6zB-#2ntft2 zzb(Lwl$%VqNh&%%EE2EUe+o)B|D)lX1oX?uNob?34{XE|su)D6OX^f`;|y@megkRv zbet`Hz7?Xm+JiE8?*uj(L(s~P9MDP?ChyFEPeVJ&!uy6~Q(iSubY20yo~9re|A4ja z3&5nY2@siK#$kTAx#X@%Fs(fj1FmjC@jV?p>Rb+$jS9BrE0@stZZnA7s7Nf=?S$W# z3c!?$oj0*d#otbef*gK2xw3f!475(i@7!8?W#uwdnOXzG+p@_gE?BIhm!(}zLRei> z0oF1_ByU$a9@RC*sDNbpAwHj04NQS6mR_hDF&iQVb#eEol}y4V6HFOWhJ|zy;sld2Ul$r@xM2FNtHi!u1;)AC zfs0U@B+UWxKHnVsFU}$_K4ie*-3I8SRYfNUm|@osZRqTbVCqW#vZm|q5n<~rcIeA0 zY~07K{S{x+29rVv@OeZ9o26-uJ{MK2z7D(g9srZ)@mMW03a`4Sg7S`;SR~M(1Z6?v zQ?TIM;(2IG1jH}k7?o4*BL_H;-q|NVNRXW=Q5iFpst1RlQoA%0SG1kgD3sK#vBnYe1$5q#DIiG_Gz@?Scrdjcx<+JRI-7B0QF5F-L3 z=$EO|SdM2Q;1h$5E){gy(F3%)L63Pl${&8TSI{Q2gEXIKj|aVV$t%lo>`FP7R@_iy z{p3tw$tWvE^FbgMJxPU@b_bZDn2nz@&8Wp_WiABh9Zj;IFNmMc(wS2qQ&w{cSG(zl z6^7N~?6@SlWi1yl!$qubm{14%^we=)nh6=%xLBYuWdz>b@Q+@0Izlo}yK@07ez+&o z82H|cp)yPbuA44H!6X@6w^9+G#~&la&XtCLBu|wlW8%gM_QTR=5Ivp!90lTHNK*87=Z=AnE zMriz`GDYrq{nRuXkz7Y_KjHe;gHu2|q>%j6j;00joKyD35}F>emAQR$IJ``n1q%0c zId-`;3@&UX{uw!le<+RW-AlgKO@Zr@rg%o}A2$ZqLE`+0SiZ~>Uq$5F_~m?KZsc&< zi{*i++LVEFTs%NmDwICDo=;CN4kWt5-Dni;!4<_(y4J`|Jk#nG*(D=~o_AKk>8jzJp{d3{fa@>2XuzIHyU3p+YrHclossP5r!&$W;7-aVGIDMficgKBwTUv^ zvptJ$=GJ-VIy`aS(l|G{9SPKU2lNQaX*R8JXnAk}He$ za=^h|5Lnp3n>6J$!RS zL_Xe6g}ME1u#W5P4USm~M-MN+Ifc97u+LA*=ZgC67Zq&pv>%7lOf``ZDbjfzcbGVh zX~2)Mr{X**{8PZA8PR36qiCfK~V#yWq?X)9#q@quH*?Pf}N2#dfxB(hUp3}aT3S!?E zgWnUai1cv{(C2`~LvP;Dj$xW;?2rT>^^Cz9E%3wX0$3`ojx$sE_-a%Ut+`K-Z>2}~ z?{UJsoL2FsdE0SfvON0!VTfQ>F7r5E1>KU`***1%%$OI`;Oy^cypXO7b63Su*VDu4 z=~w~M#mhh<=QO%?LLy7tG@_ztO|kUI*JRGm=b8#NtO{VR!Yk|KWatYc2q zRxuBc?h;sO43PstS`Z#G559>k$kyN%DtsLe+a}n90Rzx27-75N*>u~_fj^<(MGa&x z-v;3U3Pc9ZbHEEOh2&8J{)o(>j_xZ-{KUh=<6RZ3ZZ{)~o$4?pbSmtK(}RH57fFZj zDaPQ@W$JRQg4oQAB5}H|Xt3)68D%~U&93NB(`O|p_c0!S%o_B z<%o^c>T7hIr!-{!9z~KhrE%kBN38DiCYg-}VCkPr*Zv&=bw#h}zfb0*eX$(c1Zm<) z5ne@LmrE@%&G9<$TZ+x8u8_J4ck@q{Y`q&M+ z?(egP*+zkQWXwO3BMgQaVaYh8I){{h{Y?VdSbVMv)aQo^L>O}5+xyBiw0(fSd~g;k zBXe+Kk|xe|>}3*Y5OuENf^YvxqLvDvQg=1&-xZJVzNcXK_#Aq&d?xGyFt;|;YwSwvIMiY-I3&~3b4!CI2K&F*sLB&r>9Ny)y(~65oow^SVOisXM zYpqdY>j$d6=`qoHo662UVvPr4mt%3rcOoh64>mcmH1h2fY!SaC7dQarw%s{2zIQJb zWvW5$B7mi84P{;FlVAdfT>uy{$fY+)+hRHLg{wuhJXr+&YTsx>w-yvP_@ns471G&ZgSpiKD2`Adw=FA}o(6xY zUCs5nxO4c8yO!7MSwNWG5~y!n0_D*bxHnJ%PES^$yYsJ!6K^g@Z*E{!lrM#2{U<~X z_S;chW<^`}cQXmGli7QRY@uzxJ0{D1p~+c;M4@aYY`km;kAE6rX^1m~g^nX9Bnn8* zU>3+qaNurl4iX&RBz~5$n_T=bob36SfwKE{;>Sk`&{DGtOXPqV>8e2`tLMX>{Zp`1 zKLhH2Dbk~x6Y+3O0(@LrMAqNWpo<_ss%h;aAD+J=8tF0krGw=>5I^Ym za4&om{)hZLC=b8A4zuzJ_Ihhw@1Q zJ^YFu+I$$yxBy5Q4*D)tXb!{2n27r<)5SrpsbGK0m8Rv?!4fw=)Jfq2Y`EU%1$vuI zYEZ%EHRU9v{yrD@wG9WC-KJ{W&y(t>ip-hY(@B1D6PZ}51&Rk8@o}p$nYzA*X>aaj zmFMk4i_ic@ym<*@-#QXxLc1HQ|msH zIiB^%CVrtIG!J`C=MOVt^_ZWe_k9^V?(iCP>^;P^<*tCcW}0Mhi#(ni?69%T-cPrl z>|}->O2NO6V%T24jG-r&AnsWYQk&1vudRh(zDN&r`obxX7f22cexy@2&ZFUbmch|G z=b`jb1Z4jyC(>T~=-v@=@VK`O(s-=`ds!j5d8dXZZ@)lpEp+E}%`fQRXe%OWh-Y*p zBj9!Ha@w8Dxm~|30HVB*G)?)(MjOirf}?Uk!BLJ4*Kwf^FLyJQIk%WO#vjPL(G$43 z`b@%CN#w>z9rpZgDe#dQi=!qqkTp?viNQK8{KAD9NWHMZxt5fsXC%@K!#Qvr*H3h> z)dAO#QkXbf1=meYf#YLRNoV|T;+%m@kQNuFHD@(wrD}oRk~+%wb7r5W?geK-CmFtV z8FT9F6zJk~Mz(V0MEP4v!{M^1`mM9IxVq4JqVRn=-TZJARXvhOl&&k_$AUa|9dLmO zCc$uQtt@%@J%aD{8; z)E-SR-h3_|;`U+b6;E3Q^O+L~96(xj5zdlsBF`P-&}a5yST2{&IL|$U5gO<4%$$Qz z^IZ=g#b&jxi)AN!cfVCc|YS};owZ^@bA^?MC)J202IW55bT z#V6p3hrYNlbQ~oV*&d{pDwy9CXOK)aARA!z-4VAdW@Y8{ZlqG z4=(&;?VFdQ=C1)7yJjE#Z?u5_HCtiE^#2!Ip|^Pgx&Ln$7a@>G=F#)4Cg(mY{47sM z>{DXCtxDjnc8*+9Sw!}Yn8j}XRYAmawTP%$RbZzwfvj7&p4qf-DKkoC2XpB1Yo^Uc zhqT8|C5C^^sk&1$)3P9)%q;pMSY~=l>??Vb>sWNN(lJF$`amoBRlbXS;YX18kUX-$ z+?h-f9%NkaKW6;ncaWwubCP%NAYBvsjoE1?BF#rP5(CFfvha=;F}%_*4mvfHRNY%j z&V7$3b%PB=_op7Ij7TIi+|HB2`M%`Q;9Bx$?;}Fo2btu&nPl6mOmg$R3vqeiOdk0d z(n9(5%&Z+EvhUYZoBvO?!p#4PtspD$-+m4URneYv`fRjCA?*!K2J2-d^ym8`Ear3! z`}FRi^Uyb@%lZaLuG$ZmHfzGH+!CgE<75bV-AyC@?gpK^2{hyD0V=F21NWV^&~*6? z)mgX%?}=jJ&_pl%zIqR_Op_qbbc{g5>=6BTb%=dnHLp<_c9XI9GC_PNik=#OnG5q( zguj)`v2?T$r_NpjUNI|S&D~7+sI>yGC?&z`6$2o4I!oLXlF+N#3bsFtPLPG}ow6`x*jn6j=_m~K9>=Q%_vuTF$1SsW0khZ@{5t&LMD9o$ zwEZ+mo-vP_S~_8!X)1ZHxCho1sgbwuG;ytZ3jA0+qS57R2v=K(fu05t7TBia^ik1p zao!%Xz&jhV0&j`$g__xpJZ+9@{9`z4Ng!yQUk`Kr_rv>?Jjf|6Aa5R>ql@*D(ZM={ zzBW8dG>%?j-R+G*zpaG6FIK>9o=-SHO#u~7j)cDDe(**xLr^Eu}x)!XdkPJgirl4XWt{I54A_HdM6G&qkR_WKjec2BvY9DRaw*%1IT(U3wP$jyXVQy~;-O zl?S2q+dKR+c^6)vE+8w{q!FXvFNjP<7NmYUj%jJx;(EA7GYsbR-hTAP za6=f#>7>t62XIq!3o~N&0-9{J9+zG{OdbfF@XO*wD0`!dZtoq31%I>P7za4l+Mx){ zfIkO_lz_jSPSRIq7`<_@9+MxH!JR3S;6SbbU-e#K&Vm`Nv^@#`cHAVLODOu3^^pbp zgF$dG97?rwao^7MaMET1Hb3|YcXlknr!&XUowegZiA$mC+15ji$AmCPIZyCWqej}b z%LMzP#?lL=acCkt7K>AqiO0hnEO=wd)v?5IDI}Tmg0H}BCw*Y{&7Clgc6P~S5Ws*0PDO}|PcZ*L%B^Gs=6TLvz|au~m4kmmPWL&)k-i0$BdQ7a~p zgot9X{*6fVQy2$p54)iL2q`eTsYp!q;z1+!5^npr9B#}a?|JOdP=`RaML@tQhack~9_(Uby9nKJY&jfWxr@W#QV!y9|E`A@45Y)*;@k#_R8b460Vm&$p>7EAETsOBWb{AWNGm- z_;Ek*=+!~rldm2@16K<`*2`xdcvDb9_-U!MI(x2 z;PJ*$?Tmgam~XBGJ*>eIqP``Zf%l;&*8hWBH1mIqtvdfXSV-)Ce_K4{vOw_u+d;vO4nvarNS}6_PZIpcR%Da^`!>}6 z2m8q4e;RZClawS;Np$S$)#`}Nn@YR!3mLl_(FVI$2dCN5`+IF)eo<+xa2cZQ8Bc7V zRI1oMEInoS@yr~%)?=^HDoDkyyOXgK&GBgJW-r-xPItyfA{)EnLM^+vqnjIpY@%%= zW|rBVuDMThe)a^7ioEkfC@Uu2Fh&9%0PrVH(=Zl~DIU{~5*s5k@T+NrHm z>J7Vn^Pe?__T@LNDwVVA8o1L~!xq8#%-gn(%3*fX@~CZQRG!^8@fbS~->$~i>D`Sr zTQckvzku!I6%tL$W^8O6=jCOWyffY|a7enT+w7cO<>17|y^d*iBBK>{5uu%pZ;!az zrl+RZjac}g@js8bKJ34adFF>%cK`7X$^U+Qqq0T>hJ>&24OzV@IMjdR zCjW>+mvKujNn~lP4v*me6&&Lqxq7p2WU%j=5dV#ZK`O?J3nyod_>Ysa#($Hq`Ra)9 zE&h>(S(;%{p%Ky3)`my=PYaC-*%a&>8R;8cm?gV$t#62LWMKifSBL-hDGF2mkS~;7o+k8`)2sI_7*juz2^Fnf?JdkY62ygC z@I}Ml%&Q+|k|;VZagcx4#D-sM%@>_&pT>bYqJ-8PlIr!R-4UHr*;U^xUoJAv7mK=Y z&*vvB59S4`5AZV@1U&hGWPbEG1yP2Xl6CnVq0s5HG5>Slu=+m#QzDODb^I~9vqfJv zX9}Nh3Fl9WO%sZT>_j_jnnV(RMZyA!$NXap^sUSPP3B4Xuc$K~67lnQ$O+p@)$6y} zdWa^4rSoST(-cOI59O!(Sc`O*Df7*vy?JXwj6`Co-6HV2CpzhHTqNS><@}P1DWbC;FY7)fZWUELIwLx%G=lH7H&H0w zwO@2^NRRhtu$eb4Vp(0Alnw93_zS{2tJm{<3%;L!eo#&{ftP1Bc3X~cOP7(TQMO3f z-`pZJ=^5f#c1Kz*6pyhk8R;bad#FN`w?F2*Zmqp&_I7FB<$d!+3VtH10EsyMlAcqw zf2?K+(@u9x_Q0Tmg>*Pi{j5@@V&lG z;APD<(5oE3U4dBa=t)>HJW z&xikd;SJ%PzNdWid-D8Mm(TM2&i4!3&e;pYUNh$p2XJ4!@xAc0)Udie6+#g#lH@=A zb5+!*a7lD)zmNy^xjX|KL#y@!6+-b?Ml{pvCC{KGT2wnzn}4g2FO+y?&a-Oa^V+`T zT3xfcWhL_?u1-qhQr#pQ<2utFA`K&J1y__dweXGuFU=Kg9XN#53 zMNrnb)D^1ZtLbHnN!KOQ^ZW81=0OO6&+2j&S)rjDxXel|jwQIjv+w^~&c^XshD z$eM6r#`c4}96f#B?Y7~f>|a}X&oU0zDql?CUEMXpYPec9zt&&KJKe+=Svwh8TlzCXYMueTI@Z9hqs^R5ldeo|JFNRM5dJWmb+z^>TYlNWnTIGDmf*7?z$qO@?{Nv zO<11jU3(Jmx@^4Y)|kVh<3XkTfIK_V8kZE_VUth7pMJr7`^LwjO@C+BB~9(*@13}h zZ*c4zUrJTXJNcE=y~>lYPL^~LDIZ=S3f#V>{@cO3=cOP0<{y36$@lGh!q59-!k0bu zlD9tX5ARQgiuI`@4Wi6wGhw5CwQ#`)8NNbn3{T~*9N+!t2)=xauE^88jh7P=CfXk{ zfp=EAM-;s>x_(EJAK&iyBjLH8@z(2<+O4;08a3R{Y2poJhg$#ES!nHZB-XlW$(;HE zj|0{Z^DIO^uZ**i3vsko+;y=&%&gDaK}lC+*REiFW%6)q@2=0*?`zVnKc3iY{l&FQ zWa*P{U7>foe)LiwYu%)+_1TY3@t3W6C%k`oP}tSo#a~_|EedTuT%T_>(l*1s=89Z$xA5jY zJ6M0tPTBg9@z#2IM$G@ONB;l+=YQh}C{D8#zSoPhwqG??G@x6`Pc3oc)%hTQqpYO0 zT~0UeMOZjL#quJ5_Wj|)e6{C1^+yK$f@$4Ch2a$SF=RX|HjAoKYj_CKNM|FR%ysKFXN~2r}2yKWLsPQo>XsK zmRx5A9oAl>0_vA<=-}N}8e89S^|ZBh&U2CVlo)HHAG-B&>(7d&fAkTKtkNvCD352SsoAOCbu{o5ohYhn25`mzNh8}80KT>nJIz@~aHE4r#RP_K2lSJcf< zXjn_v+4$)b{xv^U>%n`K{31PhG4pzijnDON>kf%*A$jiqf3f%GQ8h;I-+u!Q8Ym5# z2P%qE>FjHtq(X*JLK#BI5JH(lG^EmJymv70_E~08dR!11r_HQyYY~-a{_Lm6VZ1+;iin{#3$gk_Y(svqzB3nD z>WXFLe#{S&+%$||9iRyBtmGi=cqGb(-yvhBw~oo3+6gaPj=_8X=Yr!mr1;^sEPQ>< zk2tyNizJ>%LgLCQ5s#2#o%KCzzqx=54-7+%hvUe)SM|_zB!hwIxJWT)CCjy63N`aSUVa8&b?DWm^y@xF>68RKznQqNXJLf-Xiq{9slIsjkxqqwB<!YdggV8-VbGWEq``sGNsUAQ=3EYtDeYP^MY z|L6zh1EcVLjXVAd*@kbkKI3A&7Q1YDJxDqKj0c)YlH|ZQV4v$xb2dstsEIP{Rt({; zp~<}Z&UU(Mwj_Tz=MEZt+eZ6#sPVf+ad7mVA|0ZzkDhUxMyp*7=pXg{=vo;Ae)E(0 zENdAWt22xjusism(-*JH1Fz`54j(p}VsQVWyNX3!^? z@bx@Cdh5un8_wc$nI;$>o{Fn|!{B3!FZw6%Vdu6fu`^MZShSl9QC)h5ZO}3kxhbZi z@1JArjj1;n_H6^J%wgCb9l%bf-DV5d?Bf-l(InaY7I@g`(%NgqQ1JX9p(o04@WZXB z-;zaQcA9bYF~PHU9N^OKVcbOKlEC(eHCCw9f~##WD6buj{;%S3L3tv~fAELheU(i1 zHK_6otJkDM-dA8>XAJgEcG$IH5KKv44|9V(1Zk=5;NoYF#*?@4*`H!zsROV%-)(S2 z-2yPYHyQ7IwquSLGs$DC!F0j-eV{v_KOA3Wi8B^95vS}4+&|X|D~hwwv^<~Gzd2V) z%v11f<`nR}H4oi3Ccxiod4iYfBSp$d$imI z8tu_!$>Q^(_sKt4totU=Hj5-i`%BrM85$@PDg%e*(kj2Nlg4sk2+Vbs)CIxfW7j|>M@^)a-wN;NXW&z7VB3`^;KIS@@J>%O z#11NDZ~PpfWqb~IU1iP#&ClRc)19c&p#gzEMJRPJmb92?fSqKPD9W}UYI!UJ84VS> zJzSgG9~}#d5=UwOQ6aQvYa9$$(cq3#;;4PTBeoh3$Jdi9=*VklsI&3|N;{^~5!nG% zk%qPC5q$+Z+A_hmCWsG|a;1feI>MS9AK~}s9ki=`dno4)wZlk>AU^N(XH^4~bvmss9uU`deZ$OnjV5 zBgeF2qV0YB-2W52z8gy~XFP^k(qBlBn1}Z28PmS~rMz?4WO`IFmHcnKj{n~w|2YFz z^=-sg<^rf|hU2MsM^WWhp6Dd~j47Ut?CVo)`s%bH{8S%~KR>NxLltCbdFwt_)U7~+ zhj_rFwS)zCl?ys|-C{ngHMzsa`>fWe7y|z;q&9&SP~vYw=XPHFC;95hg2TXf=i!9u z7F^}kBC8I+WZ}E%a

xdSu<~DH0ynNmuK~q0^tn;o5y+_~M$w=+}ouXiHZOmRQq-#SN>8 zlH?3BPIZF8jgR0qZEmC*be}eTm4sCOTL9m$aUdz(N6~X_Z9JxQh%Wmbi4_0JLaCIU z*w|$rdA7BlG&zJox%EZN&Gbn|VnGWm%KlDgY}IGB8;Nly*%5%FP^*DFX4WEbMeX?G;FX<^4 z90AM;i$Hg`gcHk(t*9-6U%3NYOszv4AAnw3okpb{ zCMajyO?Y$M2^MgFfmgiCVeX2X@Z~agr>`=|`E~gjJXo$q*C^$X?JM(O&BhdZRPhQP z`Zj^Rp6(*?o_q1*l~J6T-5j{{V;o7tKWMjpUOZZRkXE$o#FAT+5x!Il<4e?0g1!^G z*NVU?6?!CW@gewj1iB>xdy48;34`U@#Nz3#f($SVl)!9naSu&Mm!H+VjimodP6`X zrNKQ!^LF>)Rcic5c~K}dNqLJ;eU65~-_nuQ)x%I-qkuRY*Aq_TH~jg`PAt3QD&k2i zfgAZEpr?ohY7IU`tbV27XSJ2s(a4_UjT#~6yaLX=dpWqrdlB&(oWYHxlX?>;M6}Pi zlX%`Zn6u{){xH%{q+_D#+u6@ikd-|1yt%?yn`;@tUr8CeAxqbarqaRmgBE7O9slaULFYR_Xfe>wwe z&VnD>+G_FCxl z9UIgeoPtI#HXyh4KIE@xJe0P(OBbYsp!$=uP@Tm(Ps+N%%|*ksg!MmsQsF0@>RyRt zBLs-*jnyP=*94&+Y=ys9XfqQH%SmSIM`&`s2%3aHrSH%=c!`n{UFcxmSIO0+z3w*Js3fU~VjUV!7<2$vfaL0=QXmoBd%zsu5 zM`PsBnFR+?=GVhePF0Sa`fNdFXL;$Xyyn=qJ{5f%62py3+DO2PCS|&!gvT`&Ez}P} z&!%~iefe*^D0m%wF`x;HKMO&gAL(%GAAU6Ri;EmsauoviOfvBB2U8Lx0;`$`-L@^C zNoorrLeZY&ZZ!usWi()TR@O+m$`$Lp?IC@NY;4FmOKQ)PNm#zb7>!shMOV8VabYn- zU)s%$u+~S~_^=RmhFkGK$0a=WViw-_*o#juxJXYou>DcP7T7<00c$B`kUC>sbi&Al zs6BCq`$i=xyT_8a5Uxia+JE8LcV)6Ge>>9n$#Mo`f=EKb8~UTkA~>XqVb!T9lGo)= zMtrK^N~t5rN_P&g+v)*Nj?F;F%2Q~Y-WW{1ZH^Z{jK{^RXX#EUE`mJ{!Mz>H2!+_; z?we!yUXB`RJ21$RezFQb8k9#1ce0$YP%o%j8G>hm`fv!}3i9VuF;1%Ai+nCCU@x5k z7RWL9jKVhS6DNNBhO>X+@VgC&R zWVfg=QGa1Vn$`vE(kd*5UHed9Mc|0jz%<4k<~#B!8%+YmmtE2nL5>O=biWkyl; zEgcq|iEVBTaE_n(LGK?cfU*O>nCsyX=d3m(Q4d)>ZuTtRaafF?>pQW7@*(8o#7nNo z=flto8F(d*Am7+o(jrX)z2If*`m;pz?W`aPw2p_)4F}NvUya!G*9DTd3kA7-w)3a^Q(pxL*X(8qrXu|2O$pPZA!YYzw0S6lgv z!ha1lbzN~rk6hviTjN2#(30Pk81ui>UM&I^Ac=e)sQngSS6$w_s zFAsX4&qfD^H%k@%onl~=?i^EfS_t#%uEqMTrK}X96OB6mhvee|(Ru4!e94bOv1JnE z+RO?ZJNFH?dm59q*DsJAqzJB2i9oS@UL+?_7QS(N%{f<8N7934@V_K~_)O&((({;r zcfV>Qj}%?qeT7aFemqI0;J>=W*I#{>05c4I> zC#Te=p!UsQ&@46zwW&vvD!#W2&$>cbcixp4_B6rU#@A676M>4o+~CB4T2ijk&e-c; z!QLrtgvYR)O#6;N`zVbFeP=EjUkN0SVxW zAXCBlNN(dxXzXl9zNc2Ah}rY_)`|i8cEEP>LbnGkrKFL+E)V&Un}Opy-jh0u7?Qcf z2tP3DLGw(W(Y5zS;onQ_e><&%mhde?qo>nRr8PHaecUocwtt3mWw~&*^?o#1SV+!X zaVOv3>_vWC%NWfam+*ewt)SA+jIJ-#Cnvn)@DH~`DCNd;ntND|%$Dcl4O{roqD|VE zW_M54fg zhWnH>vEmOy+~yic8?QzpN<;9icoKX(R6stD-ohCJ7tmruUn1t2gz}{|A>#?rHor(x zzDo@)vrEChE>@D6(Ol@#I0KEk7NY;$FOsmP8;rs|V=OwyOV@68C0!Q-(3c&J=;Y}^ zloMKxHy+Qyag{DGk)we!r*rAgqN|Yi`}_3RSp`yUZiB7z^I*(pmUF1XjcjE{vD#;f zYzX-XTl!p@%!Xs&ef1J7*r^8}ZXAX?q*`F!qyZFJvyHTc3lYAIAo%lhDI6W*C8071 zZDapK;K*X=<^G7~16kN6Edd8=ttGw3B%q#M1d3$e#Z~tn&~8_^;yaU)q`1h3ls(O0 z7F`N~KF+e_wZkX;YON7^yCxa?{<#6mMU$X>=rX4Htt=5*WQ3E#_jBy@HWH<}>87F; zk&Hl%3Um0hIK~I+@N07yG89k*q87?oQ4h)*xnnp76?5 z3pBsPh56~=im%s1qtO4P(ZjdfNPC_ULP8zz%3C)a6%@!^ddG`0fHdLBpTl}zzJb}E z34Cpy6e&9)L@s^biB^OJ?>q$et1v2yHr3< zXgBAzxCX)wN!WWYMU3~@;yYj8asG@|lJk8foUX?U(Nc3)^5l#d`u^!6d~xgmaTiY~ zT6G6upo0nweNqJfTe6Swi>e^eU%n%~l@TP$bQc-B<3_Z9Eyv*PA8eW8O9BUq;lk&6 zkc=52+lO*w^C}wEch_L|)cK@aPlqgK`F`)8EJS}32~%o+7ryw+OL&i8#idf08Ow#1 z#0Dl|CM5&C==lrfq~oD)??(aJ_$v@=+c80lM* z%8ooZ99@Zd0y|)$`ut{AsE04G^NWcPA5z;A0@F9}lh%VD@VhUf{d;Rb}KRn0r{0G-@ANSsSt#h60{3r{0VWO!d{muEaHK7&wmf2k@JJ%ft zL|?`;m&Vh%3tH)jq~WPhA$kR=yg}UaoFo zJJN*TFQ}Lr#MfiblZ3?QeBj(;9^jykK`gb)m>uXFiWg?>VY)-|_^$`%lN*z&u&Puh zdDOa)mJCrM#RfT~eNi9n&^BYuiP3ER)pc~siEMf_5)mqshgI!|utJ@B4yFk{6^)7P zi=jD5J8_$73?Hgc%x=6{jV1dR^Qrwxq{?s&_4u^|Yt;0VH~$7ERW7!sw3QM-E=?=*f9%Tyep4lTe(Cw+~5ko`E@7m z3sffk-(8vGyd(S-m3aEiQj!`Z?Z?YkF30p(B4@g4K6z$(8&@~EQ%kK9`qW60Z4A6c zo8GPCWVJ`r5BL||&&l(vWj(R2w?2Lle2eSPr2H%=%;Iz=@vGOUag%N~QJv&-`0`x? zezpBv>|R0F)!J^yn~eO31=1xe3r>>~XKyY`Ql4#{AWvpqy^PI% zlnC?87CP&{)hx*=i_TwNOD{Vq7mu}{jvODpZe=dYAtjAOD-6Nj) zKe3I*EJEJ9^9749uMRIQe~omGL-?s}8i%Xjb@`xq8i z152HpL^c~RJX&WT3tTZBpL+Tq+o0shjS2fjmpW`A`I1^(;3i==$w8frIFZ2;rx-C& z+61cP62R~vM?E9jxfjtpNn&&d{djwTAH47me;L`vjg{VspPneJ4e`E7mwfhR;X8bA zi$Ms@8TpFV`l%4P^#k~t#7LT9xro~TjUuz{u3_0Z@qA;*BC0tCXoV9cg+U%zW&S*3 z`e!s*wf{Kv(=@_0gJJBDk1(f}8N&t_oTo*lp47Ez67!OeV!qWEuvFn$`sR8cmuv)R0zrGO{HFUG}RY!Qk>tlpEES=Gj@94|N2o0DAW1s$HvF1uW z`nu*Dp7!t(@#%iTzwsD`FOMk_TcxCsQH!6D@sr+>7sn3r(dCL{SyT&E?#QMQOQcEv zUtN+i%836Du5k-Awz9?b$8fGeEdB)Q>}cIx{K@kcJ!T!#3-1#_McX%~B z@!=eE`dQ34*Bmy%)5GHCYFqr}c{1{>29$?wd&Ozrv(a}h72=#btH zI_k$V@%@x*{Gsmaw7~2O_M2kM-nErxO_|U3t)G4lx*FCMuZE4s+;y(UFMe{Sl z@e0XWYH{}px3=*lowN4`o$rq0r(ME3JHBw|B7NgWo$!XGnM@Dh|eG4$UFJlizHj)d-6$DSP=%Vxg0B=-ONkWTY#XBMF*L}d--CO*z(Rx2~u zj+I*Mg6bIN9piz&(J12EIg7a_2a;-jC{?VxM5BVovN@m6;gm3Y;$EGJ^ZzX5H+xs% zY2OvtqCZZ|NG*~kr_W?9(GKK(bUiyEn?siE@+RX$m2tUqD&1@8%gU$~JG^EOQ@CVI zp0CnJwZq1fv*LSX_HcI+cvqgdSo730Rf>4bb;4ibq^ZqcQ?fB{DnEJuDg4RdER#>v zA-T$f_)PL#EU7nyy)G{1gRYMyLBmt%x;$If)IFAsvVw`#qOY;%< zN2?W{nS5G^4(-Mo#}r7>9UIoSSq`UFW^<D?26e< zbfDaTUtjM|uk1g@9`y$D>SzCQ_Wug_$+uS#{~?yd;e7zTl&{EbEzl)%Jp`S|ERI`b z--gfIEhc7bU*My&W{@J!S=i`XCt=4f<9iCP@wzGIRH|Q&n3{@+`>tqw!X%Eqo?psu zdhdtzy7ZWd`dBj0;ytyweN?=|{CbS`L8Z~9Ir&$q!!}BB0@i8?885noT9Ur$MEZ4 zPQ(62_T>GYo%D6`Lhck-M;}dcrjJgJBztPD@T-<~@#~^b^y!V4SVnFK^Lm_5pDrH3 z(+p)YG;XK(`n|V!)R_7FzAL9`fX@vY*IY^s)=FV}sSotQ%^~D@0U{R;enh7ohcM@< zsrc)K12m*-64{E)@&3bNx|t-=q5TCYz{U|f1stRE3L3a8uLIcJI$gSOSTY@w7{k}C ze8>BShuS1uGVu%d-yA!<>Fh`Pc9{jc&FG&Km-*&sBfj%~ zD&Kw);UnX%n4Zl7rqV6Np1*lWR}J!XtL$$)a&{tj^ok8D`DH{d z_#Em3Wi%0s@ujjTs)pCm1uN2d!8VFb*LdPbGCQcdOcMI|>?+^3awD!!UC$P{$Fu1t zb%mV*5k2L&h<^BJD|SxOA&ZUG$R`;Y^3Z6ocDT`G(mdaTWi36y#cSN=+m^-Av18XV zDI-Jj=;tId>N}5vL-&zCfnhAZdo5|PGA3%zKVjG6Zfw+~K%Dj5ut7`$BY~y(_LJRM z+;j>T&r@MKZcoLkIcwSL*T4DnmMD60%@OM3-N{S0w_vxYk;LYR0UI@}jMb|e;<~B? z{NHwa_OYgs@68HjW;ae?pJ6jdM3E(VFls)xE3W{XzJ9_z9de$$|CKX~MUgoOop=LOZ=BHA9pAh=?n-i8zW>{rmBi=mbARBse zq+PqmZu}%q#5PGeu}j?^EMR0j4p!DB`+RH3yeZX0=909X#cdnvDW^^%lho;&pbTPg zPN-uJMH08tY?7)@sp<7UWLd&(JYXHp{C8`T(F*Trl*v?fy~T(1ey*e_Y9*b0W)ZV` z%+o}g$)sC~LN9bXoU_c+>g3f z+O+86>xEc4W)X1~5=C>aWD*T?LdUu3lFhjlw9wgx&b$5hpc zjFb`!d8|pCe%rFBlsa;yX9NHIY7(}1;lS8$RZ{uiFxD3*%U>b>Bs_Z+@69*So{%=Q zxjUWOR+n<6`Qu22?{M5Z^(MBsnZ*2KJg9ccdLsS$k2qDLojCW*q+Pet8FNXa<`sXr z(>_0NLuV+Ai{46KP_MH_XLn8P+I4rgn29}=H`m%+Y^WLS`gaBp5MB(+6d zIM1$=FRacXv;JzaRQ-np$qkUgVN-;jVjq4G5Qn!Mv!daKl~~o(kaR`tXA&uqc-6*g zZeZaO^4VuO zzF6hcFS=b}A>FWBjhlQ?p1t)f=Q1}8XRbega3z92_(kPBGAptV|4nrzt@mEi={J>e zhO_}TSt7$CbMysY-e>Mn(@lJLFqobadon}W)#RvGJdu^OAir-!QQpmiok*<3j%LPq zRZA;QvEPihN{nHrydBtPPfEr*oI)(r}*uh@|+JSh!nvY_yPvprppQ1=g(1 z^#a}}Q9-hgm$NU?skri~K0cZChNge*;)6Hovy2l7_(@D3U9osBDc^Wm{5!gW+)itu zLByElED2&N#}M&r?jYOshu~SgnfT+Wsm$=RCpmiP5;lx6V_Pc>nCbE5EPrJU8R1(Rp*oh~vju(7NLoUA(~$T;E43<<3*v7NMWz&j@>1{k*M>HHP)6Snu5k{Aa{uqCV7$m?hOx)h8#|mkcKsmy1}=f^eLw7D~pe zISSey!#`}-kplr6>D8D!xZw|?P>oQsLR^W$39-GD_aGZEj zZ3g~k_JiXEU)76EfAOrQ1KfgVzXb2Qi+E4G;1M`7mOIlsgHvmpOWc}c@hZ*X?2+13 zvNPU-C0R}+Cf5$rbKVcR%t)cH9%o0S1NPG7aiz@Q%T?T-9)@4}>>xMi4Q1`m%-NJJ z92Q&3V!2PcY<#0USrj0{xStYi!PaKFp~jqu?w8})RF#HBxiY`{_w-ZtTO7Fj4FBb_ zB}v?~mTWVXVkM=a6lFHBUYSgmSN#lcdhi@)33Hb-nal8^?RKnqsxAMZ=rrx^>EXAl znR7p0sggBQ;_wWqx!A0i(xj|(;`?7dU(nNwC(Vf>Ho6Mr9C4(}wA1;q3H!O@6%Y70 zsp+_4`cTqa+fPqTGbTA>d$_3!rqlfcDYQZLGCDUfnxDGm0o5C;N!8X&#yih{6HECA z(*BD=)@%>fIwmET^k9%?*rf|kINOm8J)N}kng%xPnv5rGe@+cns1W^dPuwWzt<|zh%nh6wR&MfP=qDvfu6dMZ+K(U-_tr zpFE#P_uLwSTrOTkd)msmLpMTbrC%Q%Vxi7f^gqR4Ppi?(?#J`Wpjc;nun;pD)p68cbn8rInBE`D}*4>nnOoMzqBW|M!8 zVhJ0JsZo_AQ=R;cK6iB!?)BSL>OU#cGC_iMl@;Jmp(bnv-;LLB>xg6KXu7%5lOAgk z@`=JBJa9)_Tmq;+N6H@U0H%+^k2^ zHq4~|EqO%8PaJ{O{z@~|ha5}U>_z=AB-8op_3%^=J3PGMAm?bRMRGRXN1}rTRKNEo zwUpAshwn_~K7RJ5W_F2$zcP$UH3@j&VPlLQgOMi?s{~ z@uuK9YAXK5&KC|N%U)Nou=XxyeS0-CbeTq8sN0fF5dqk@McQtqqZu3ZXO!LA+d5cc zRtdfK+=cAEbddBsFec)Mb+p4M1+VBPwcP{zG^2@46hZ z)%cO(&LW|smPQ!Un4`#hmn+xazy#-BxZHWfQ22FCN(i_ z?0Kvg(;Rt*dG}PY+ZWff>9&%jB=tC6qrZ`?7xZi8)efX3aV<96Dos>S4ZS4)uy%I) zJwfkygf>@M5`6zG?nu8!WqyCe7i?}5afmnTYD~oGT6vVebf0fOIf@NSm_V{s#|!Ud zDjhJLLF!9uaB)d7J(l0VK4sgma$3R<>AA{xI(xJE?J?Xt{{_T7bR1W{XbUM(D50_Q z^VowOOW3oC-mK;FIY>AvR2_o@qze+5O=zuO6bWFQ}I%i!$qe?5!-|6;jbg-(f6AT$o#$Yh`ii5=Hs4^rB)4PVKVc{^Co2?lJrEE68fpD%M0## z)*oDUM)=0i)i~y&8#(`IJiRbXihY*7Mb*PhAVGTjc)n5?FWszk3UuhxzR-($z z{IFm@He?Z(Hyr=Q-LiwV13$eMk7ajI0g+@6!(9dnAWXT96Qs8xuZ_+~8@at*r zwCQi$`E!u|_*Fs%TPms4(tp@IyFwi6vXAoA5pU~nqIcqN;30JjiF@#5;SO>o{T0=; zblV=XQdI|cl#io#^g^j|&RyJX^a5-BP9&9j(xg&ZgPSo%pR9QiOm&pw+cBxR}H)IVhn?HrztgPl$`-7Ng&}<<=I*!Df`7+bO;WVz)mMVOEPw_4V zYLNDa2dlF<>9sPgDBg*kR-O>DHV$!0eP8hNJt=(fvn`mrBo@zUpUX7jFXH{L1Gz05 zWZAK0tLa9o5Hep%fkbY3!sJ{us8{tpJpI-ODxXt`SLh{?)orCz)v5+fF_B>9)4tG$ z$;+5bw>n+i7*1LhJ~7+K2=+Gdxj6bECc*C`>BJ`A(B=$$(8*i^;!>tBn z$=k8)5d-?waytDIq0dt8-li{ConcqZ^XQr7a-?e0S88lI0n^(tR1v8Y{_G0o-JZdo zJS!vz6y2%kQ7@do!j2qskY#If7vPEke}1=~COH6}IG`(7&~bnBTQY{S`VlMfdF{_M z)-{Nkd~juv+g;cJS3f%5<{F-~GZoWoNmwaRlODgNO8Vb_riN7`SXikG&6oT@>n~Hh zI#WZuyV{Q(61)U!`V8=un~S;2qPI9oVt_u$>A*I+YGkw0BAl_hpVs&s_rTM74MoTG`3ifqli z0CD;Ojrn~IW2U<<@!w}jaBc_l$$92Q#8=(<_R-;--83cgMk5-xG|gcDg=6kdmmB>O zGmG98@`Mxd&W0h{Z$xPmzJZMkFHLu3e zDft6C$a6R+nml5H#YXl zas1+5vKWt!qEj5B$hDql_)n+_{~HuY$L7s!(Zw9TZdD^)G~JG_Zt}+ZuE%4E84Q>3^g7$k4AZPqfg4esL!M1^ z{XJO&2NX!M5yrK+VN)^Pr$3sF?;J_J_FNG^%PXVLU)JE=a|6(Pc?*1N!6-WBr4;RZ zcLF~tnt>-i%)t@UyZPSEIQqFiUz|PrG~U}0gS$dTVy*F-cy31!+rOxpeh8p=;d%?k zH73w02TJL$+XYzZ*&Jf4Sx0N#wy-NFC1^|aUJ`N1TbL7skfrixgv=HVK6U#t&hg`7 zaa?o)DcGw+X8!EL;WAF-#$5xNXQacP_r9i&#|pV?`ttNeoe9f*wh9kFdjhwxYV2|^ zohq5hvC+x5sU6N?p)!M9)Cn2(wjvb&u+reWZ}ec7hCIACFA{rHmGBRJQtNmZ2Cyd@KDdr0+j?uTf^vJn#AN)?}f5zS&&+`4E@vSjtEM~M0`H`ZH`~C}H z71f8=yQMSnQj#Wmo$vV8I88jLT}9vYYtvd`ulx1MS9Jd!9rBib!)>oVaSLV} zF_lzF!9T7~Y;2>L`*#sPL6|=~l^tSUv;9d^`8c*iW<4Eq2-po-JHn^RlJ!}mu-=CZ zVmsNFy{N3fRxV30=Q*CPo@B=z=o-qz538`rWHZ(pk;9)Am&j(VlZ`J$Ov$-0H^ zs*J(*|EkExylbpREeE;kj;00&qlsl&3>oGX##B6R<6x7&^z`lXsQOqknXqUbKkl(T zK9_V7=Lp)BqPP@04>^T@`J5If?#`k`9Z5L+Y7zZ(zl57Ky^BV8lyNKMV`*sge&!|K z&-M%Z@mrvTIvu-1hE_N5t8>bjPft8mYg|K;bp2?;3nhMD5mfcAkz}ctsx$aO4wpedDeX`D;`!?5w(5Pv&_{SVJ*epV#r2+K7yMBCV zxdCxf%*0PEOxSb(m$a?#uK3-`fBc4x(@9Tb6s|qeMz32m@%yqP$&!b?)Tr+PeeEcs z-@azh7`r`m)xiO#t8|k)qfv!SO%)y+u$A8e*ww8jY=X2Z8CX+@r}*w7e`cgpnM0=7SvQOosCRKa z;r_U^O`hm4x=F`J%Mt&qL;OnJy2>2bQM+Ma74IL^<|cVd5%!nj>N z92e5Lo;JANqHnIL5VtZrmO90m%~|xA9$h>K2OYS`UoQAZ_oQo+EzgA9h9NaHX2Cl8 zel?K4-y3PYf)yKTY)eO8x1@(+k~lo{8NC)Zl!lzpChwF+5OTklYOSiE)@H>xdeRcA z=|}0>)6Ohwl?}hXt%~lNzKmV>O-EC{E~MTUa>OS`xw3?B6o2h#6;Jbvpizn=$^1rb zvM)}D96#quS1U@eSeY5P&fx);;VyDojY|Cg8L6!MzoLu(S3vOp4=?_oh~R$x64(_o z1$t^8!^P-%a6xh<%=*?2wQD)(a`b}2lIO5Npk$;xF+yMM1gcT|YWOtG2!@0j3X~*8 zR5Yg^`~-qSr>;9VB??5oPw@~~(gQDLAHop#MKEP(4V!0P!Gk+GdX;?cSVU4A(*<#w<@ zfD-8S6N*4NSrM&_9RR&Ahaq;wSY+MW2!C?QVCk_Q@+K1K;2nZG9~Hvm z^?yM|FAQac?MI{3??H`C9C(6wV8Z~F)F+y{{1 z^-y8jX86~!3_V^q1MD@^J!*XeFc>n}LpRJfs?gf$QRY_`50` zOiDFjeyBPe|8+xjv3N2>DXfQv4f|n)PYlGJ4+oj8dm%^TE;PjjgPOS&6keDKHo@N@ zSRm{e+KoWtPBeg9?qpPOPY*ry4n|&l0W7NDjs7LSf}K}8(DbAFi))WZ@KsPPO z39?3GOot(CdmbLNJ%;0}=c2Y8Z8X7Z9dfCcM~b3nV14H$+M7)v=wL29e=`QM#eGoN z{|mJIuS0z9P1tJc2gzn4ShcYbf=^ur#{nUoeViPK;H$`?wohazSqyuc1&aOJA5iPG z6Q221!+#qxVdH|gkf}cdXwDCjmvTMW*a>7}6?JfJw}N~bcPP#{03jtU5F^&JPQzFR~pQ_8^oM+Ri@cA)1m2?U5P*bC|7d1di1 z=2ifBrAb4m!covaav$d3*ael2b?|cQ1sJK&1Hr*VP?XemP???$sp@w?Ex8x|ixi>E zuZQ8OXa*c~%7wU-Peik{^Wa#_DTumR3KP#b!p%#Uz(x8w9FJ5**Y@ZkOO0@_i_HVx zu~&3>-D#26`{9soD+{%ec97NN4|yLYU_(u>=*y#@qRd|*kXpY4CcPL6dBGi`Qz3^$ zCo(fdfzd|bHGL{r7n#CxlQ*KxKNdmL2p7m$v;@MA*}}=YEuy*&wh(QV12eAV!>BqF zB$<33k^@wc(l0Y);Cun%s!E}4*In?d5hC|SPD0IEFQ7YBA4H%2iY#O-!CChgJV<*5 zM|^p>kn|ZG-rJxD3M}Oygjh~P|v=H9*?uYsxm2HK-d6MX6`5>4iNunHSS9ERcEVSrIHC$X4D^PU8 z;6ctF7%M3+kh?ns@^&4352=FZ|K-Azi)QG;#eHb>^y{#9?=-aS&Q&-Xoe9b(pFysr z2`ZPBLXxTjaQdGv(t0LPc~_|*jpxa*8waDOeZ?RhGgKfEhM|Gr0?7Gs0M5$$2xN`t zprbF1qSjo4T>GKOWT*lfoQILItqXegTN<5Ra2%pu5p>&jKiDSN!McVEU|W+3+CR3y z)I|fZ>{>kR?w<$VvvR=srWzW3@Fwh&-V29U{Asw*!m>|?tG7f zQ+L;ZhoKbG@wy8M%Z8yJEy)W;e zimO0dt$Wa;yOP}HHFu$^`W8I3*Fjm=M?sN-6IwgX4h619XuMMtx*4H~au?*Huelm% zUzk2O;8F?iv+R+@^#+-uZDzsow|h*LGFKfWx`oHBtoABuEW zWWnSGV?f?vI_#-`C2ATx2TqeN!QE|jkWr}%w)KZ#+_n^$`$iI3eK;qO*!KhPTmgFo z0^2Ya4oV?1$ZW9++P@?gyi`V`Yq=kw!L=EDqpM+l(^a@|shp3dx{g3PKMauL*LWCz{0973rwX&B0>RfZ}s5Ks>*OKafZzEaq>Py9E(dNeoVgKK+&@wC*WW6uIy#Lft=gFT?+AW7P_AY_A zMGAltrb6|M>yVpP21Dg-(AO7^0kf}YBEgx7Y+tIO5zZ7WaiMdOqSy-^kupUxeNW)b7&R2HnFgDN3ncT2P4MEq5qgo53enE8XhUNM z!ml2p$FW}M{#kw0<#!6aqqjlClmSS#_z6*av{0nD1mfQnfZgLpP^)T#cdeZ;J-Q7l zwmb*#P;Inj(?Jj&y9GyoT!kBh>R>+Z3M>kJ3uW<^Xxifhh@7p67B77V)joqzAN2q} z{g@5sJq*x0fpDM038c*Kn~>osk7PCtL2A?D&~0a3w0>z9+&Mc5tsNJOLQO2tPd5#; za6JzifrsE?trVPBABkqFU?iF=(EA_&CMM`3i0T#)ZjS+HSE6!;$~Bn=R69vFXy4u(;pRg{f4-GkI~??KG^v17KrUq;gY#EBn_TM zb=MWSEg8d5lKBNF#;?It`3Q=fVvZ#KZbg@nCfe&i05|<+p{}LfuxPC{dah!G1m_iM z3(7~oBW%$)HyLComO?EO)+peuE4t#Miz2$OLG&g`R2gOtz2DQ&+`5TKVWA4jfAI`R zLNDBM@IX2JS5VTfSTwe87o64@i%KRvhLby`>2;c(lgs60B5{L3=h!!Mu<< z$Q*kbJYWg>G-(A|{p%W(t+zvS?n|LzGfhyx@P;@k20_V*Yw&etB`E1hBIDS4SPM}g zo8Asje7Ygyp#%C-s0Sb8T40mRTQC<$g6T)i(G1=k-p|5tyWI=c&pQHVn)T7$Bmbaw zeJYd;eF+n*euDpYE#&U)i2QWt3nU^j?0?q`dgiL|clklcDpf>>W;3wq@9>IOvYh=ci)B4MU| z5?FlKKyvRVg3qdIxcoF3Y91TFsDda+U&uj4n>E~a*$ckg{Q+P7C2}7V19V0L%n@kk z<%`$B_EiS3tm%cwBJQw=4>JPYUH`$6D0>(zKLDBk1RA*ZEpVSR4dxA|LgvrwqKe)a z@XHZM(B_4Z7;FP`8sET$8OkVHDiLgzw4t2u3e{z(+j}H8xlwgG2SuzgIZF zeg3c@av1u$5}`crTDYwE8r-&p!mdxoC@XU)GSayPX=`#}_(c^IxIq&YIDQnRb=$!9 zo#!FDq78IMXru0J3h;VsAS{z#1lvWkk;#lm6r$G-3Vkc!aDyjYzrIm4N2bb;x(7f{#c!t{xsO4_>AkgpXZeoPam7uZTk3hFL9r6x~Vb`t4 z@NvJDKs`MV7hMBjUG!BrIoBWl*0w`l|5M1l+7Bmuo53cn5hfm$MiDL3(aT0>l({Ah zoKD1mT+ukx5gLQyAIwCTCbz)-aYm?qWH^#rSBy5wb-=}lROC6JgQNq7bMIWoazCe! zMuQm^kYMy5#7t;_-H#rl6YF%)lY{5r(J&wMtEB==&mKhuHbc13`s*OdZbiD5sh~e) zI4FLZ1BEaKLdv$o50wkBLWP6>Zj1tLxfFQ4ITa4wKLvYo*9mi9Ifyk8iw^sb03Q{B z2&gz7jF%NcX3#qD+1LaRTpHoq=rkx&O$JZHT`*y%3h?ROuwZ>D?D&C%H{J?uSZ0K( z4=+LnvzlSV$uOwY^g*qGyHMvy9WMH-GhwBk#QhG#BeY2*&B&WVzp3O@ozXbzZ%TO8=we> z1IX!h7MzEVQIiIJ3H{Xb9^7i-ZKYI(7{Vj(G<&D~sWf(3|alkO4>K z!r|<@7jQk;04;uBED(#^U|P^yxZfvuLoKI-Xi0#VEx z_^Galmb@0j;!7o9)KLKnnY-bdRSFcOt%X_Hx8UYNfy9_`7gjrTK;3Q?bbpTrx??#4 zG}ITuf3}LyJgAF)*#*K2_ZzT4I|%f!pa8@kRLq^)hzFT@^SBAuCpXs z>}iiae0&QV4V6&zVp;T0pcbDEkU~`hRWP?o1$0g?1i#UlkoosDM1nO6j*WxAheZ&Y zngd;VRbVw>i<$-M@Sp;NXPXF^4@`z}`UfDmPoQ_23Us~1V{qK}9YjvagG&~YDDme2 zgrAjy7tZToT>3&(dA$O@6uy8dQyzn6%z1FTF&mw3I}Rh$4ne{*g8tPSBZu)>=*p!& zn6`W|iglGhc@}fPO7k$hw{$}xS548sErqc7*d8R#PDN#*7AQq86+N4^7u*dNgL8VaG=)etv zYRNPdJv$BU3SW#Swkx6b!6ulIF%w-dw?Y4{{RK_~Z{XjgO88L<;IREOGEtp}WDk0y z^2;3%xq(9+4`Wf%qD(Z}+Z0`jGe&;@>`}yk1#I1TO7y8l3f>&khpT(vi7dnz;*~X^ zCTIxA1w9efxGKWNLt$XBjf1qs*F}SgXGFgyGSQYe;Y}!#g{qlWU^B8;)I4-3Y|l1= z){ADac#$*|paGFfG!v^fW@w;w%v_zLz{ zKM?J|b4+CV=MuGpJ?wwTaY?u2(A4BW&DdT)b-DSz~3K5mnEo3Le&|L z`}snvyD=OwHGxkd6JX=N1gLBvFwbf>sM!A&J=VzshY*33iv~qm4Xfeu8%GE*ZiLUf z9)Q(QfcSa8MIG+eP^&2k_HX9FQ1kV0^Oh`p@@W#;?i~ej_8#n;f3W*S?c3m_>sY>+p zSvDZE2k<&XpdP-_hRDhG5Z)9isyMSr)RwCOJz>MayIBW}k19d^MhDpcs1!8PoZ;l? zeGqk*7wK3VKwNmYXm@A0Xy#aT@aq@{3-fwK4Lk=;w}*qB3Ai}& zk?70fa#7~E0FjczW7`?6-$mnQN`rHp670o$Mft;ih~`Y}5E-W_LGvY5@Q*DO4X;;) zD|$94M9`!aVib{6S0BtV-+|sORppFtzXsKW7-)zS>iBJAk(1_0RO9~?R>z-%rTa8s zr`AL`J-T*92xC=jj8X=1vedxh=Ke#Ya8>P0MfX0RHpuZ&? zo+_t+tDX+LpPUP7qlDV4e=g9d=P+-5I*1=BA(;(gFc_&1NxUq$%z7iT7p(zBH!oOt zcN+{pIu^QOc<8>~0dCq2@NHN>6dzQB4QJdSbk`Y_KT-jG#R_Qd1s$Yy{u#7?I)JRk zorOaom%-veAX4d^f}R{J1+jiJ5+CVC8Y3d0Nx4I0Sh^C)S`I;X%4ASt>PL94-3Yy- zPod4bW6^@o@1dc&2R7gw7%Z9qFM589(v|$+ldBs9)+)i5b2$+9c@1=KTMFZS(;=bI zOCS<2hZB{Dh9`f5 zVU^}3(TG2tq5vC75KX)Rp^9mc5*7ezPB9SZH5ToW{0F&fw9)j*A4M->Ou$6Y<6v$hoLjk4h3(VjB;g;Ap@6W zcsQyIMun~b5p5Pp-u8!xds*Z8sX zb3yJnhh7I{puW@$v~CB3vG$6{s!SCg^dEz&;$u*M%lrS)_1;lE{_+2RrJ*SrS{jnd zXu0l>*Q=7zFpDG^C96&)~k}pEEz4MB$AP`M=JDv{>JD0&hLB9 zm%p6Dak*UA^?W?;_uKvPz`I9V&}I;dC8IJhy7(-VRz^TDst1=<7pF$okx=T1_hGs zYs7D()98F%h{tDd;i`xVJt!}P>vr~S7Mme{jR94!evLHE7KkZ-$AX6j^u?

I8+Q9@w9jZc`kc{;;#MJ9KQ_Kf1>bx zf-I$5ra(u!Xlh{`29bFKcABPztad@G~G>sF<@Q5;k;FBWL z%BN9o9n*=He@5-ijnEGMhsZ%EnsKHN8&>Ycw&5_Crk7)N{%4d*)}UHnnlhyZknh&d zwoQl9fmNf(XQu+0%BWK9{ZS;G$@ILL5;V;@3c_kK7&%i7>vkWf(JdcnQAr=xF^_~7 z@f7PD7m-w@6{QE=250;O%Pwlsm}{>w*G3M%%7SsVf$5w_gTYTQrh?oI>|JFHecF%X zX){Us%O{isdBHiph?`?Hz$vaR<)XZcabRT$rr2JF2jbC~n}p7**Pv7o2)^=c96CQ4 zYRqT0m6YO*JS$H_=3}V+LnJuma}d?(!$R9v&|7f@>ux&JnK{4ldA<>?h}=bch38PE z`3lG-A4G2YJf5$(GufL?Cnb|_5HDrkT1Y(QG;Jiuc01ZAKAA3N4Uaz(I9f+R?zRxZDI=(@qqw zJdW0~n3{CJ z(U|pDM{&Y`J3ek_+pw#$aCLzhlBM2qx^6LCnv^ppRE40g+XIGcb&&BS6z|6$!t;>@ zP~E*Blm2MptED46Sudoi>H_bVM=>qzC_+8FQC$-bld;+Oup;)Wr|1EyRfTm7EE7;q1JjT=3C0*dGsx$4hBJ4D-`>tsAIL^}2zCuIFI(To+!{3Zd>~V|5;q!h8p_LA=_{w<(e>$S3;YXZkMO5duI5% zUy1%!F@0ah7}LI7#g$Lx7`Mm{uQ~@Hn;eBcekwkWxd`vsrEu)MhL4L5gQU;lU+NP4 z^wGqD_EJ14I0K2!qnI6Qf!icQXCH4T%Sn|u*qZ^7YfR5@JBIadhwy8`2j~to;ZeCN zo%+3cOlMgkc)C4ZK@Sc&ar9*2 zS7aWnh1!K8G+t4ons2GN;8B3>n~0J&3}8=nCWMqNXn%qg#hlWm5drmdI(a|s^X5~% z@D_?STuWLi8MML3i7HRKkb|`@nZ@#PE-8bq?w?PmMU!Z$xI3LZy^v<$5V0wpv}kxY zsSb%zs<;8Q#-D<*Rw-<96VM)Z54+?~fG=Z6Z}}6*XR#e}-mF45yVs&a$MMC;kpjmA z;*rP%+*z{>?QwmGc$SLyLYWv?%;C|3Cs=onJuhpF$yYiKH|=?-4q?xPdpk{O*iK;w zM)8c2r;=WyA59FC;GI}InWk*iAS>4>DiMh$*JT6LojgdVQ-a9)-(Tzx6``pk$I_q0 zb9j;s(`lQOBCVXbmj*<<$x>wKwCbtI>Rtf6NR1!mmqw754wdrxI-m9oOF`|Xdn+64WU@KAs;{7nFbnK%6xd{ zXCHlr6Lv4LRE%#C^g zu@N$48FCh_vu)wDmhnHql5lz=#9N{90{UVvu+g9m$;lk8mdvArFXYL{SA?eYsFC^P zXAs`<7v0Of@$<}6EPg3T2hLQYorG!h<4d^0c_Q~xF~0IN=*4ygIvjEk6YuZG_&-*t zHF&@odcMKJbmnR7dyjp#=P;)`51NXKH1|mY;#l1gwICixgbeAT|6epahGX*J1vvSy zCFcPx`YV`D6*;3R;-wgkUGNb0mAyz33ZmcdThW=SL#P-*M#k&Wf6AJC{2FP8L^!SY z_oC&WCs1<74wCCPq6;_M@bB_pAnYQwugJiY{>AClMb2fd;&AK(X_Efn0#E0knP)Jl$DwX8>WjMFB$+B`55LEE%;!& z9*5FGaJ#0S^GXuXdwngk50R&z&3h4d&X|UdxX{^ek>uJ#Bx{n1zm`(;?(GwNv5Cg| zhpW)1B1&i~fJjLj&V&r(=n>}a_?WPjEPW8w|BSyE{vf*b1w<_iv1O+@W3c8>nV%ND ztWu?>s_U3HlJy>E&tl}l6{H(<2dPdcQLPY$0*zq$FoQ?7+ltVytxFD%)=-zL8%2#R zMUjXPtl!S1PgSGnGB=O%icHDr`ZPLWuR^09_8>R7n?B7u&OC1s3aoUZ(>ZE%e_#nM zdXr3BJNfkAon}Pbjw0Koj}h~qHB`A@(5!2~E3dQIQ7%kZ4_?BwQ+YTfeG^Z1U&pt9 z^|1PI85eY7A>R{;hBZ>Sv-Li5++Rbof@y=Bd+>twKQZ}lk!5L0Ngm}W4R-}MYZ@(C zH;x{}FTu?=6`Gg+7Wr}An6I*$H1C=-FQyF6JtlO>sTuD1XRv<8I7;1Jj$p6D2u|LB zlr>Uhx%&sMeG;Wj8QU>-ecIT4CjkFP*_up`zQX=Zew+FFO;D>w~XoWw`kg>GMjX!rqG0yTWDbF zCYl!zPZsa==z`uBTDPbKrlykg;^`#1tEEAc^xxu0(@1)>Z2_rB_>!H=WGa3fP4mCm zF&$o>9_mgY4Vfg;|>w?uaOH>rd(71~-G`eg%Jz}1m!6qAOyB9zkwCzbn zcoP|PK0)uvd9*I14@!@WDOCLjoc^_=>3j=52Thci8R5eCUh{~XTarvdjkTl_WJTKHCCu{^C-*vKN*vUnljrTJ#;FYeoWV55>U!Q9d-U%FvA(4O$c#1DA>^g#Phl zziBb~d|!dVI&;KE>d}ig#uT3L3^mg~BIUCawAA+@oeM?3JdfIr#p27B%@8`#jL%+Q zk!tBl&57IbfE(tr+&4nF{|+hle8uj1J~&)gwygQ@#Z;JCkxRwhxcg6 z8ivV?PMi~rCztJ&5UOcHi;W>w%8e$+0+7LhE09i7Aw6CJOqnO$;b;l{wh(+cVuCtT zX z;c$4AgZQ;-aG%R`y8V|iL$?#96OLekV=Gr=CXPiFCRl6RhpW_#GW`YY9*n>Z#g|yG zwHKqmmLYnwB5bM^@oem5`tTqJA%kWJtPKH{{DS)%XJ$`BqUUWf}sJ7sx_vL=mazk)N}vV z#d8lD@1kN-4mMsp0|kpLXnLn3H)0&sHEYw6fcdbw{Ft$??r6>Eg}6Y7);7JzE|F~H zIhG)+!-GoeR*{UDJiW;sO>H^Vc&R2!ZLzVGGkYA)Z_mQ4#IyL^^b3FG#7Us0OIJ3? z(l7SiJ@0EzEAAeo9Hz}ri621^cL`7(W=rcOgYl@-j+Qtm(6YYYh{{<&xxNFqDm9r7 z#jixl>UxCAD3e&SFsYb0(~~U~@Lzihi<7@W*{1}&^X&KSPDRzEYe@amja51?^Zf-eF30Czq;^9rosj4K;?npB4Ql#-s#Zb4I zj;7ccf3vQrZE=L z-cg*4KPgkDh%`Mkxe3y?!9;)BBFgmr6C>${U<*}9SkngER$Q6>0;)=F z7|HyR?|M;GdM_9MK80YKQX{^e8$n59Ihq{a!#K^`7@N7AJpGhtL_#2$<}uHA-3{Pl z1Rjj3rI-vKk{$IDceGdyKJEcpCk^23^%$BJk&9|!13KMO3%!qtRP$Yow{+|{S}T+c zgDFQz(PtzCkszxD*1iI*)6i&JgObB5q0>)ASb8 z%1f-y5E@M`ugod@dJ=v0kE120523Nel&-W+rGWb9cxS@A%?2e39w|vKb|5C3avb%k8K{e`hkNB4{M^cVlow;De|HEHT}|oPDgz2v=jd;j z3Y}|x2|G>3s!#Poz>E!ar@R?fhPG7qM4X07f1^ErHbts0qdgkmaL^}+vEQ{!>rSL% zmvMA;)DNt`XwCF#Z4#?pL>pZVDO-uL=z9KiP-hNZeOg3wB|~Y+)Cn{-<_li`l%fY+ z`6N5<5>*O4q!g$BNO^P!xzrw`(^#1 z;qLH#WK#mdhV~$AVliGNU&WIhqExNA4ae+u;?BHQSldvowZsRv>v)|C3a5MKGg z&@d9A#Mg{B_j-)g&(~Az{q6KYildx-C2G$!APJiid|0SSixs4E*cH zjR~dRCQ-|%0`?b4&7`l4W6l`%HT34iK1Ir@3zTU$9)lh^+!C8ncufp)L z+mN!j4u=_;@O>SE#$atyyOobkS&T_#?DqK56QsT^9dSuRn7E<{vF)wsn8>4FJw*^Y z!!ijAwdhH%6TNKFB1`i+EV%F!noyt}6Fag0=MFMiaSt!0nMYdw8lTO^)1Mq3rQ6=7 z&MZqhcI-S3&!~cl{A&99MugV+N5gGF3gc34L##)Ng5#<%Vf9lSnf?>rM@Lb7#WKeF z6_SnmIC9v;*x*m~a0x0UzaYjaw1kqZOcI>$v|#pMOZs&qns%OP#ls2#zDHcbHmMCX z@VE&M)yeqsMwEnB4MV1PCF%XOV$WhfS{oP>>I~;G|qQ?is4lSmn_9(Qw)_~cSQf;> z{8KfwR*6vg^lkX{eG*M;Ixp%RuHvL}V;N)#L!PeNmIVDRQ1j#=xW zvNVj$l5;S2hB7@}?u5czSC~I}kLfvG+!Omu9GBSx?ODA@{`v~l@B84f=^B1ntJ9pR zN2%_;1>T#y!QYGeSpBb)3;G}g9|Ip~)Y&oayAjT_52NF0F_-?_hOuxSnD^co(z`w3 zdHgqb)p8!*UNEmmyti3B8_PgHM=6|2;ShU)?I` z%}=2G`dGZye}v|(vnkho4|)3>A#Dj)QoXU2ocE4_V$%_%e3z$r&o#;VN-k`#Ji(Fk zwlrdC9whbN5>H_+9shP0%{8JFCMil6*X_cKsMB!dqM#bJ5R2w;G~z}XS`WD5ZI2C7 z6#}tPfql0#UcibR8h+qq^AeyLd8kaGPftMX;UcS=vkEJ8I%2&aC&~$nSOo_ zCBJ8qxJenb|H)BWcI7(dSKmecqS3T-=@3RG>+&?8kEiEO<(OCd>i)EDv*SxVT?J{@#L=|^+ZxV8^Z(&|g zG8{GPv3%wh_mc0$QGX6Bwx^8i$b8c}`&Oz8@-;3OzQRosC!%nx0SjxPpSH69S zS|osi+gbe7JA#0Y!{onQn1rj2QlQm4Y#eGwS@lB-@DZgwUl=>RVHtJ#w$i7PF{Get zNtr5=G(<;0n@m-TNZXKbD!EYpQ-2=bFCsBCH1oBD~Q1v|ym3E^^qTP&Uev+pvH>Z-8 z&Nhm#V;V`g1ch%1fW3MzyqzY|nC&jOzUMadeL^8?dH~Va2XSBgIV_i5fPK#-YI)QE z1z|(Vu=|X7%QP4*N`&~)7u@%A)hL;n3CE&K*uP;h4$dBg!Tvwkwa1GdEEJ((<`efy zDUhc0H%y-9fNF!Km>Kj5&cO+=5^2TBjw6W3SODp#rchY%4ZF%D$l@ap3SZAc$VnOd zj-+Go{Ta-?&X`D%htRt3hn&N$?94a>HTM$;R$l^@Vl})5(eSLw5z2~9r&C>H1z!j<=ArMwLJ*ValQ!Em`&rO$5N)TGhLj@ z>TK>QX7IO?N#ivd8k8f7TZ6@Y(HIIij?lddkP8sU)`C1Nl}W_j{1W(N+QH*~i|I3-vwRCMP$Ls~hfhJj0HaGyGn8A$H7ethnNb9T7#? z^JYFNH0qM0@DohjUxMPm66h!;KrBd^aF9oPO?Sf5s|)raxkyPGhdC@~Xr#V?ycVaS z>v1T){t+Vm5(#=^U`~!Lxsb9^g9*zPt(n$?jO(MZ@%}S(F80HNGtZ#$UY@%4SEEWK z2(~Y!>Fv4iIJ;Akpc@5E#%OpX96|z)peFM(auzYao3TsN-_;@`#)Ay}*OMK~lR4EZ z(b*ps2sStY@yrqopIn2$5FdCu&&SY`%V_?X3#BhCKRId=;`ilu)Fpd^J*$k8@+|5 zT)B;nb2DM5t3v6rJPf_4LU6S-sU9{XiQo0`XBy_4v#060hc0bjV?&|OgYluM2__L6 z(6?uR>t@g9v0~5N14A>KZZsPO2P8v60A6+kDe|6q2kd_JU`5X=DK&>!!L43 z)Ck3k&k@X{x5j!MVHgV5AoQ;yEr0S7x%n>GexsdRJm(QQ79U}Z=qQ}Av_au>Iozn2 zi!s+%%EPXfyMP+kvIn)|mhEf>%=ZnK7kFdCBB6{3S zurA&VpGGf3ar1cuXLfQTW504~1C87z9b-DV*9`Y<;&BE)xbgQGUud=&r#;eecB&z+ ztWv?;NpeWJ=MIN)(cHD4?VOW#J$x?C2kI?RR5Tl1H}x?tF#xK)L)_d|j#w){77wEo z@pAb`uHPh{)7iKjshtNgE^rxC^R9ChH3}6@Cof$BpZ2G48lO6^+-zo&LiR_KRoz*D;7TG-33Pi@4e!gyA*vlwk1(YpRD3 z(A)-P)oO^uJwck?mr)dFf0TM+!%1Y965YQ13=7BpV7aFl80wBAFRgg`aB2zJHYd?y z#m(gXsD)9Ne<2m15lA?~wUUUKCVWNNHNbx#jESUox>=mqTgVsxh?ACc2{Kxl9T zRk1v(*;H3@um6O>g@@tyb1&v?c*9M9t3e^A6`1z)BnDdA@J4$RT2G(A-x?Wuqqm6) zw|nB_=+{trs>kD39ic{HVe(ZlCBB?0-Q0SJMjc&7?N-b)Hrho8pD&~4P+6XeQ8QUS zu%To133Q|K3mq7?8pka=!bH(t4SXy!1C!g>0v{1^zT+-x%J(qU|^o zssrPKNC++jV*+Ec%o^3ndtCvho>>eTy9B7_&c)-1kW0poP1Iq)xLYLK2wEfe-(Sjnl zjO)hY(g!HrPz^_q$4Hp3PWwi#po#OpLb#(9Jk>wwP-WVS@-DkO8E3!+D* zX||dXZSLPnyz?_@=+tin8tkRV@(y&cgYgyzMp2jZ42)W}luYZFBWsfh1UcT&NZx_f zT`V6qT*erLBe*+j8^~=nJ}$|@vcqF(;1WGL7t>2OVL&jR@Qt{$RUfE;=y&Dlm0`w=8a9NhkFiD<3t!Ixy zsOdCJT4ON(n&xMm?5@C0r|qbT>_BYQ8A$EV!LYFv zIK``2nW9LpKUtr^{*GUL>DZ=n8^<>0;f|#{C9xiMao~9bPU?c0YcC8e7=t%lkFGnx z2xN7u`Zybyo<2bf=fv8$Hjh@A+lCAwq#J`PzDbq0T4C~PvX&>6y@sIg>r*X`q4jUpKVV7+Vne+a@ zlHH5j-$&BCnQmnBFpkcj5ht(r5@cYOhKq;9DNdmSOYV3uHu(WewLjqN7iIdC!C1s@ zUz!zOjDXGx2+vGrwIbtfUHSBeWqTj8IVjQ!z`tMXX->Zt<+feJ#I2Ty+5HanaV(1y z@|8|A&17>`D-368(+MMg(jHR}H=~<)CzOw0u3J!H#eBvey)fOJ%jTk7!VjZtIP3V6 z<_8D8 zeiwP?(jlASh1JUW(4OcEjW>;$=EK8}Cmb5hUvRfCKj-uxU%>b;LPR1-xa=2*Gb#^| zdbJwd#tO7t`T-e{*(k_93hB}_?7Lt9-?!%&zc?3HoFmz65q4HhF2S|sf4Dh$tG#Orq%3Qy}p{_{7^!lH<~*Ln^s^qAJt)r9`BEAV{nB8<7xf>^`jn5(Hz>*mR0 zVU`~ro*PZ+#*1)deHzx>P^CV-SCIMTkFddqtT#B1ldJDS=>Bzt>1Lqg=6A-+eZm0)%9SiA=(?ub$P-;3xT+DJKwft;=aC9-^K(XlmHt33kAGBJ43 z;sP!6xeyjzh&^}nF-Gwo7y3g0v93Az*6EJm-LtW}ZI~PT@-Am)`ia}_>jxX9U055o z8DFj&vpPZ*H`)B1OnpVh#^1-nWzC$|7akOu-uh@|JJ&mLAB0;^!k+QX!|QK>cYhy_ z_RPhM02fTnKMaFy&!JHq15H-jBi#~OkE9tNc^U5qs^IqU6!Pw?!=33#TkkO5L2DAl zGA(iJ8%y{<3We7CT4-6#!~LAyxDfLc=h8Of++C&(&SqNXabb%7kJWMq+3b#MAJA4+ z3YT9Icz8sT3P!eY6)I&g+{-fC&3cq;poqQM44`9qP zRZ^{6NRID5;8Wil+?lIGPp+m=<{2m2`Ad~tSTA_HY7AaV3?cERHKj%ULqJUd(=`n# zZ0BtHG{&2TnI0}yzXrdUFMa982l}vl2(89`WUpjTV+VbyZg~K$S!qX#>ki|(Xb6Q* zKSJ&Wr|9{%bV`X-C#?xr@Mw7?{wzK~lCc~~#_Xn2Ss7ZcIgZNi#na}^OXt1+aN3{RuW_}4IwnjF(8>q#^AOC`ej zz$LWr6~$J!WZd5;NBZ|Gas1e1^sxS0*-e?8t3=7p<{RGW97j*`JM6ry%A0s*6FFYC zr+wdbsWqpDBHhDi{azc&J9ie|-8aZ|w*iTT`q7UyUnuq8ShBEtMh+xSpFha)V%E9R z<&B=C@kEJgLL=ycXCux0V8Cl0T11~#XwbCntCwP6Dd*XS- zMP^g#H)E1nzmUECgr zfZQ9HFl81#rhvd9g*%uj;YsXiobJcs>?!N?n6 z?83~cw9cfEWq9JTnX|+`X*OHQNd&fm0_-bgd9}M|k=~~dwXF|OqCSG=1tucWKOGN? zJz#n=7}Ktslaf;V)Q$Z+Md8NYb{}w5;|wo*H`L;nu$hV?Fh{xKykc zx5m);SUgTBgy@M6Q1y(%q_u|7`Fj*5*+~$%UShM**nFvvnuz`(hM{y(_8u{I&vP5R zZO$WR@&pv^sKbPQrr)>!#5Q+FYzdo-P3Zw}sab{2WjfgW$`92e)8H6c1W7hC#pGEQ zqUz^iU|TKf`gY@>g(6M-5QL>-Q=z&!1Obvdu&PhT^g}FvwC4!R8_dQo&nld5P@vbF zq9Cnag6IrwSUvxW*G2j?w5kPNo!{ZMM4DcEC{kOyJ0&W9#*Wx#$ZhY2s(|&~Ri{XA zj}BQ>_hT;0M~Q9pz?T`0)ZQFMp5OE-?^PG}d4=Ii;!0ATIfM+&V7RJ$!L*!k`Vv@$ z{o^b!bKz>7`pfb%J*Ai3!Ur!5g;h{ci7Jo->MNC4LRDGmjO?=8iZ7C#Jl$jZ1GD!uAdPm1<^Zr;V40S z{R(hSyaL-sKgN>ELM-koL(JuaP&=Rn)gLjq@m!2bm(;^^Is1mR4xr5M0ft>3piMpw zDNTcT@AnzX%lDvanKVtW)qv^qVcZS;1+&Bzq-1=lc8Ab zTZWvAM{s>dA!1GiAEB>w&~;^ydNdhQq& z=s2PM!3b)d`5XJ4e{!=v>C>`z%1{jo=QLG2xGXlSO*ljdCF>P&VW~Y-?n>b4*9n+z z;sBY#sW4k2k1U6Bjy2)9bKIGmc;YZ7`#GY$VKY__Ug0Je{^L}nUGN}Z52g;|G5o5H zGcRr7zL`vdtbYP$VXcJGLT*^vq=ZE^0r|sU4At_ugzV8!*|Zpw-pql>9y=UOPT(9{W~0dDCw~0R!l~;I zP_}Y2J#Ohlp7lRmJ^PuyepM#<-$8gL8w)X+G4y!acWQc)K!tmjBcYn*O(we0cB?t` zYOsN%jlQGSY98rDKE%WFGMeQ7hyDMub1*0aHH$LPtJTXH|4M?m`)T}gw?$Fb8f=VP zk5=h<7&&(W8n5aj`_vKq-g^wYMSpUUh+{VvgUaQab=K=96I|OXFC`hKl&gY{Hsf9+q3cN-7&24F@{^=5H>i-uw3y7YP@g^ z-Wdn*bo_U;u*LQEzg@`awHfLK2e5MMC&*pUCe?;BOuz9bgL}Vlq)(j|-pZrZb6(?Y z^>cWNgg}RJiD}*Wbd7nxb9djsDluP7bu4Eya@NwTnhoS5w;tws#gsS{gGJ4|DdSWR zzE-b*zT#q9D76pC2KlJ(kAOx|8cu61K&J((eKg+SKpLTVlRuW7^k=gj69AUSs2916 zYnxqQ5if_0EGHCQ(}|a}USsoy2>jZ13hA>~;^n0(1SIT-m0cA3#;TCP+^bk!p9B~E zI*iR9Nm|d{aPz;J^!}h49nNNRZHfn2j^36rxiz??oq{`0S^m~?HdLE_aS|66JG5ZY{p(U{+7ldeB>Cc zE;|Ur*_zlqKNhuTjzHAEi3{=f!(Ww=@LT>7N4i4s;rKDk)YXIi#eMj73+xTgM|?u|RAHh2 z>&Ip_gM}m5Pt}F0yW%%%?uvJck5KvFW(BYJTDQ}9w#T}y@tYM45}+rs-Ry5c-48$&+vR;71V^p?n4{ECaOkRErl!h2Gld3L*~#a~Z7 zi-6@@_&?`ZRb*PNHV+&*)*@-*CG-Ae4=jxR4x7te>ZtTgyHatzcBIA9ost%8l<`8< z(EqtEwg1y~{l628|L4j=TE_7M_$|DXlVtf4w&!`_6^4AL*q!{rO#KmSQ>t_D{|H@SV=gK@hx9#*a_VM)F=DFqnyJbR&|Ia<^o-fLOWxugv#LTli zzwG(uhuRkKXUt!3uHCxVe7=n&PyV(xFQ&6ZAV|Ab?i=aD?;933|Ihb(spl?rUbgJi ziXS&)%?mXr2<*GX1m?RV_@Vme%yX<7E8I>jsTjJ`UY-}=$)Eo-kbk2fm%m>kir*jA zCQz%IC>VVGN-*^5Kl9SJ?&i&6nSwD|UwCHwOwH?3WzButua&Rkv;~(ANAMG}9m@-c zCshp3yjh+jtfL;N#eM_OH=`s+z~&EMPRo5$7*&c9m4 zcbnqJdt%+jyMMot|1WPzg>BTNinPQwf!9?d{x&BKe#R?rzWJY%<%XB0u!)Gm75^1y zmybEIzTCTTn_yYkLB87vJ@XlR*=BE#4E{izO8FU=zp76E1)0)XJ}}xX*L#bP&v5WzQ4T zUEnLGN(i2x5UyAnFIm1yW(x1(t3saHn`A-fYH|L(?>_vujbePg3E{kiY#sh0{nPwj zMH9iQtCi)am3m4q3nudZT+}Fc8=uAZ>^fY2pm7#2qvIAi+<9zU>Mt&sw5nZu~5g=X0rrue-y$ z>}r*hpt(+z|4;Y@udHHr#l0;vd2uTf%hyyM<4<m~n_)MWmG=5Yc+W^DQL`eSTNW(dEIgaqpLRb1hP zCcdkFGJnnvVK%n#SXn`a9RIrbVZqQhC!Shdh+s!h6n{pV2``9EJD;tr!naWJ5?F6u zTi$G!N~xFB1k==B^7_@;=CZGu{Nz=t{6k~h1S{XYD#rQWp5;-{Zxsb>n|1P2si1#t3f2r3uWw z&*o<&Xwbm8gXQ_^Yx(n5KP`JN{hn`Iakl)Ioi4xY+N<(|4WWWwg<-*C+wbMwnzPGi z?b%xH=xf6F>WdQe{g&l5J+0%V|MlR%-TQ@KTpUq8_NYBCsb0V0YiK#&WrRF`X_20w z*2s?z?X)(xS;XT#^AzI$YjfuRGt{Jtw{Mf;8>o}{UC0RP)3SN(f@eQ5+({Z@eh|g3TCS0bIv^^}4IYTM_NlXE^_rAC*(Mqa9H`PNbVtMbYM@7U7vXcs}L z^jAB9%ERlt!2C$wnfh#=LxzE1eS(~zXU2c!#q)ob-wqWiuW0k2_nXi2?#26*-!)a_ zi~IQs4AqVc{@l#s&yWxZMhV_h(xACu^tp`k8OJU74I@)|1Ii->xi&laCnmfu*SMI@ zv)&QUyQ%c9{8H=N@^wl>W&I<6lh&0$S}xbkEgUtOuN}66_a)OoaKLvXTPY+EtZo=l z{*i6X-q-P!r?AXl@V%TpyFwO%T%Q?&l;NW zuJ=06bMDW&KP9@Twf!Mgk8)s+*Qer{o@{!KX(TRr?`UgEEL~R_fxSiX$h=g9K5c&- ze{+s_JwL-c|5OA1>LtN@WgEyk#6VZ5F)j*_q+QAUX6l+B#8*cSny(b&d(USWY`Py0 z{~ll`KMaOL{**i^*#K^@o9SvDVP??%0$3Ld;n|1?EUN!N1HxswVnqoiH9&_Ne9ORV zGSS$#@dRicH>DZ|58&k?E!elf0p-?YlVMK)*Ymyz9>vzKW|C>B#v9TC(Ryg3o z5GAg*=@;GU_8zBSyTVx6i9mHo7nL101KCgy=HQnk%#-y)*&c@Tk-HAk^Hm|*Vg$a4 zh|vFn1i*jAYf#NUjFOBRYD`70?;cz<~|>QQ=)U$baNVbtGePKw60oJAET|i>H?*UT&lR9Tz5_tcrQv#j2>B zHw}In7vp`0TAslVc_t}*m>w#ciwEUi5-0hkR8QpvSt`b{!-vB0YMcO8xyfRW%R(JJ%?#QM6C2V#U{8Uv`K{VS5VClANdR&g!j zw@5?4A4pV`;54r1;i=hv%vs0HczE4yI=^8XMi@zQxhbLOBExSdo&DY{Y+DWP8vH|b zS6FcolN-Sb<)jQB-38Y#=Wq3A+= zHuQpyC4*j1*OSj*_uw9t=X7>t09o{&Un%|PJf^$thVh3s*s!{i#9zyS(3OUqQvMZm zy?PVcecH+Nkm<~lODpLjrUJ`vN#U&a6I9SO684qdM3Km!=;bB?dgkhk@}b8N?wdsJ z`ln%b^eX01;5{DQHG!*DzW`a~a^NLj&;JI?F?;G#cvpSII8_@V`0?>IxUJLTrhk|P zp5HW}z2ynJ{@Eqmzw`-KwrX$&wtvYMeGZqXT?UUeAJL(O|L(31!tD#BxS{xE+;HMB ztnm^?+Y5r+NBv?Fzj+rvd3ToR-OB+g@DHBv`odmgHJP9jmF({R6cR`LaQpkubVc+E zEP5u!T~ywQN#oLtw~`?)>%V}z{=R_Ulcizr_hHGn5*QkCl zlKCj1M5<$ixJ%s$yx_qZ@Z@qRW~YRc6A`0S>5u@kQFkUbpEqUZ@Ec;xl~gf&qcDc8 zA0c^j_)X8PdvT`uD(H_dMe9YqaMw?P*)iXOc(ux5DZ`PcO68cFE(I2!gK5X4a67b&>GAyvWyg17t)MKY-Qq-J5FlUxHoX zRamlOJKd_U!|aurL{c=GdHwTJ@nPZ&u6v0kS;U+oTEA`*+Y?4Kt8oIi#dSH;;~7Ky zTZGU%zktR>T*jQ+FGz{sUApSd0h;Mh42^lZ_`C2t6uq>d+Z|2d!>j;0#t6_senaqS6h~LLm2lLy1AlkLqu8Iz#Jwh-=FPYUYaZ=D1JSlPj6UaQaly?SF2`oH zIr|sEY;zUtS9*-b6%BCRCkW~dYH-CHM{safrRm2cSj!+4W~hD=$hRJ*_O|EQWauS- zmy42J{-&tn6iM$tHRi(HdvTA3JWkbf0p-jroGNO`Bt$9m%G9c9W}zki9!!LRt!c2< z;ufB%ox=#vkYv6IuH_!7m9rlY>ET?1O>FYOQCyxLM}0H05km6Ft(u=Wr1c+-j9pD1 zDDo|KZ%vLnWN>a(X|P%0771(k#cx?FhSCISEDQNS(-|_S}yT${)aL z7d@swa|xaIxd!G+Uqh8}A6(PC(=69ZnO<8K2Ip>G#2KH`X-?CA*uS=cB;Ii04fSxi zqB{`xzYXQ~4<9DoR$uV?F+qm6Y#oRVg<5djH95C|Btu$3#S!bj1QxTp$Uf$*MT@V>WDgJ)djv;x`meD<{MD6-d?2GS0JX zh<8afT>CK@SNg_ces~i4ui-a48wN8mH*;V#ERR^~JwUU2#*BF5QM}u$z}PBf;-L*~ z@PC|r7VVk1@BQcyMDf@8A5dp`h4 z)`xsEYU~Eseq5cYnz04{vx>kL>x{A$<%Bj=9V3IzTjA}6cxG%<7^KdrCanjp%c5cz zz_0xQq-OO1ZZ7U+c@{~yyw4y1#mCVa<}$LOnJ_)Q~* z)(>hk|CO$$)~V}wgJGq-xrtWH&h8efn$2%@ET4l{G>xFW=O$$CN`u2Zj@(y%28q85 zNnO*MvV}&kK+dV$m zzfJ%Q9vsHe+g_w>rspymzfMq(bVDtNLXf3b`>uMTlL-SGAPu&F!J!&!O7M~zK zavRPIh4atlLbQG~sZ6OMn*8}8%+0zv0Gir0IC+^AClq0d6N_v(*Ey1)tk^(JKDd*O zj0VORX5*P9Pss0;iC7aB4PV8V!Lp5eDLdmj`F=Ycbl=^jCAB3m#8{HJ#1T{*Ort9$ zCvrcEFH%FZ`()E^Z#+{o9|Iri!So+RD7iwIYuDJ1-aq}|!-JWOaaJodIPOOa^J%o= zYyb@COF;Z_S*Fly0a_H!2Gt~46#4H0E%`PB1~=N6j;8W|f8GUovTqKcuHE@992vV$>&M=b#8p+-0J9YXxZ46yiwM zUgB`9g5AC?8?u(};V16i*)NzccIIJMP*O#!b;*Y?>!5w6tVk++oOv9j}7i@Oi88k1P$Za}eM|&0k zKAbiPb`gJR()E?xZLJb|J}?k@Qwn+hVP^q{WT-R0xx8tPF=X$}g#@Y9JVmKAGOcPF zqr3MHoPAse+b_<;D{VYZ`Fsg|dP9uo_U09y-rR;FCm6=PF#?{ynhD+?Zev5AIhZus zq16T-ZfxlR?v!mP8u770TWK^Iew4%Re%S=wyH+qeFDBCUW?9%-Aj65BRf9H<*RXAN zBdQL(W%M?ZW{@3ldC1+S5_024yMAA&RguiU)|&p&jHGdfm?e0I^!9rfxEsJV&6CZ z4BN7niXKTTOS7z`{x9FLmxkVe!X6bQ_w&s*S{;TTH|H`PgGg#cgJ}1b3-}>#2@0h; zqu1n6&{KYfkDjeUw}SJ`m%LDvJ5ABo`6`6=Ji<$NhsjJXlYQWL2FFIDfu1hlZTbbw ztzb)dc`t@K#Oc6BYDlf}Ho~G`>zRosLTIs$Go#jTfp*6$x%;_gaNI{7b3Uyg;xpqw z*prfkX>;)wGYb1mb7`mAWgOlzN=?tWQq$1O)Y1ATJDl1{Rp&I&U#0FaG;JYiZ+k}E z#y3Ok_*!<@F&8otZxXYHzZi8c7S%HcY51vCSTZ>s`qV7&n58oL@ni_J(oHeYO&piR zX+VbSAn3NL!jIaQX6^wK>EN;ejJJ#+t2a+Xm#Ho^=B*&=@v-4te;Kkpa2*MYSxP6G zZb2i>A=YC`DrWBtf{^&HM9z9KY^zJBQx*U69{l}HZ%+>bFT)RHj=C&-IGjhbF2v%L zziOBhRfi=rHk0_>jxgUKkQDT1!(>BiXq`C^K3^YT-P5h{$IeM;R$pZrb-IOjC*uXV zF}9mGmHs5@Minq}0|qO%OC2%?qN}w zD*J)n9-9k-7e1Cn9=i?I4f!PWLj?7BTSiPwZRl)+D4MOk4WIEQG1*_@aAIOS@8IIk z%IJT5$xdm1 zL9S<0%;0yEL_~GrrXR*Id)|9!bsR#S6^I%2JZy_xjw;+?{=yjnP+5Bee=iL~VNE;u z@>QG>ox;b`sV8Wht0H4Oq|MyaGAHxE8r?GuxU(m=V0C^Ba(M#aU}1>Hf6730UO&`H z#Fqt!2j}x3H-~*ow3|t;Vj(&bX+LNuJbe=JE76>vWE>jq1+i;Sib{(&_ z=yFQ$_kgFzIMnxUCr)2xa~5r%QI~&{35I46edio_^7j?q&ur#xI44CmsLf*!M=CIx zzt=Mp{zih(1RDtU;bALZlL*SQ1l`d~0C#8M;Mu$EkFTPf^LasL-FP|%jsyc;8^t@~ zy%0DX5w7#Z4!n1IoSf>ALC1gd!6tM$?3J;DPoEXQ$S#`RPIux&mySZe)_=T^b(_jG z4}Rg@U+fBN^#}x?Hio!ULY#p5Byes!fPb`)p!dUabXQ#hI~%vq_zr!Xt~VPMhl26a z{+~3EzfzAkb0HD9xfp)8JHW3b6F77un3~cZDDu&UstP>AMn!iJzZ`>U6IPoU?7IRH zIlDo?wjIs${ZZ@41Pp#P6+GKiP%rK#_Md)B8z&rOR_6`UF}3?(6EBZz)FwdXT`fHO z{yY_yeo6&cdz@DEimuB1%l@F(*q#hM{M-|TmAMh{MH6sS`zJa@yq-?{9RVJ9YRN!j z4p|j56TW>>X3Po$OD@-q&_|mbVDI)TaQx3(Y*D;{?@q46?x>q|fe4E?+azJx&}-uH z`yKQM+QMo6`XI`Rb6E;AK=jKE`eLUzSV_OYdC*C%R(${opEKxv?=M@Cb^*=b>M|Rz zeK-4*W5ZrhZy=@HJ>c8=-T3|EM!5Q9H*RX~BVXE=V$TsHCP$!#_ai=ycW7-HS(0go zE>Vv7V|pCBB_JMF-CGGeHW8=#QDf;c9Z&MiaIBvIhskjftyfI^6$&Snke&Td^Kk+lLy4&2)M>OQ!7=LpQT* z-s5l~IC1L@F7}sWwl=WsL8b^9gOul^+5doqDZU6})pB2zi*XWnpmV1#iiIShMd@tfN#V)(j? z737#31NprQvs})g&B704Pmd_kKhkQ(9ZKXZMz(Sd$thT|q!gQbOo*(X26}%sL1HYy z6nxzU()YzV*R9vtYzH3wlH^V&$FC$cfp=k#zb(^VU&v&7e<#=eo5d}C`~as#zoj)k z!dz&@cO3fEMYWkbG34H7aS#&kNW zz|m0!cs&H@XB9+*&I(cO#BusuOA21tgwQ(IyEO6cI%+sQ26{i}V)etx_&hy?#@f2W z{W(E&cY71wZy6#DcdxT5O+NJ4#jE&F^9HF}GD6l150Y+43!IuE4`Ms!LLm(!Z&zx< zmCF%qE$=D>IER5AA0IV!N6`Z+Z_7Ua^q|2D7X!b$9X~xSDhqbHOYhAuhRur3$X?B% zebOS_{IibGml{Q$z48E2e=)qCK7rZNxE>{Qjp*LoY&7D%!?QWjgnk}?9=mRMIKGkU ztq_ER8^fV0V>%UlV!*^a(%|N{Cc*24#t_eoBRrajME@0=vaA-wugEZBdmOnG;oGz@ zT#HHhJ%W-I9^`!20IV`K$eZb(Oi;k9=1WZhQw*(^l8B`m~ro!H(Flx8W?0r)-9*TU+F4))!yZ=U_*8FDZDoq8;*H>_AtO%1h zSqIVthQYgA8N%o-bZ$F~GRD*3nAj{>`Ti{QKdyp1P497uMkILOo{qNHjG&=RoO@@d z0AUA{!EKuf=+rIZ4u-wsWriexe9KbMw9X)7o278%{1nJfY6hXP5tl8J2}2hgZ4}e!*`RH5^uj4 zHg)6_Rq#7V_83e@m)o=HRvkUu;t@*3x?_2+PO{|k&B>&6P7fhwis;~NNKgA!5YJdM zxYux(w&$tg#qW#3!9|5I_cX9X=&tE#-zltn&r*wp3t*x5Ni5txh2PM<&FuWiEmZX4 zP9lF>44sCxScx~GL}+3KbqJn@eR}e=Bw-VA%1tFPyOAt3mJR2V$F8PE0E2LRM`Q828KI zUCVm@S=mG{*&v$C9>GvwUGCMEGsMtDgmi!Q1gfS1mMtvUTHPVf{biY@edR332yzqk zlc0Un9HbvR<9Ug*;4*t7to2>ToH;1KL*i(-W5uX_?AfYC}93DRhiAve{Uh5iF4YG#o$G_p{$vF@c z7s%K>youf65%^N>1@G*_|A=dF2C(mk=v)h3d_Vl06zsA<4U>MHk62Y2@HG} zp9;4+KH^wiAm%m1fDEr41;3xA9-nmRz-lpuZ2Sgg20~~aXvD{X=1}&$m!x~Rex$9 zObq}%nFCl)<5-i0`Ix7eiR*!~bE(~ljYzNcki!~CDT!2G9h z&HWKL_WI+lL$}FcPYrxvah`nPsX=VE2xAc9MFs;$$nal9?r(M$PRz+?pY+a!Z@-P; zO1n5WYt3YCy6CI2(BJcM`hz^wjt=2zg>J_M^D}rQJH23T@f&jWVG26?C)4v|E%3s` z9x4W&kkuMRc*ijpf4tK`sij}oK$95qXT}(H2-ynd$5a^s4;!*gTMC&%Cwl7F3#z{- zA9W0pY1TCcYwXvd{I)?-k>UaO4*A2{ZR<|8!FE{5V~P2?kA?@l*;P4?Z8#*@BIuz7<3JPzQuHxlni)x`Juv|)A%b$hxzoO1Sg5`Ih~;QC^aX7 z2JF}hz9Q$SyGk()R?`B(E{@8te}>Ys86;SHGwgJ4B%2QJN7biG%+Bq*LylU;@Lazb z!LNW2vqR;tSZ3BaXr{->dkL&XL^G~V zrgcmwZny3uGwhd<-xFKNcu5IKTmy-SY<)Ft?$CLq(BmNIU6!d4Y~EpIQ*az zizj!5LDR=)BzY~5-FH0=%hTd`o>?k%9`lEa$W4X~hdz^^%i~Dv@&a1ms)cd?>EQT* zWO`V(o%i^77RS16(Ht^6c|pJh$-f= zXf|yRoD8}RORPWPtH;y98qCq9eH*!$xR4P{E+n7q%+b-=ll)W<1YTYa`fBgM>>n4n zMW0of=hLs?X^2H}{axh#=usjd_YhqAp9Rg%+$U#*zsu^B^ zmhu-stwsjaJtlE%_Y>Om^ASnp&+Up@2dQUk60Q_4f#~->+{+S5YNuR8$Aw=ZWq~kk zQ=h;E-usCray971Gogt}jqKjrwV0>f0G?M$z}HWbxm>UtjN1fI&c_OGojXU|KP1D* z7(fo>%^&hw8f9OuvESMvFhpSUm*rFM&W1;^p5eV(AT z;W67W(+~$!*OC3RCs8@=NmyUw!V_6AQfA{YfrNy~&>Wjpa3N8O_`X|*9gfM=an?dG z|7Q)sLU-A!`artJ_7{6BFp~UQd;#7rylocb_Kds>t)&k-RAKI55H3`>ghP)e;~Bv! z+Q?nz`PoQe$;yRfSNv3v9t`V?k<8zTV= zco^c6!Hz^Zvh#n3vtM3L#NOz?=-)D%Di;KpJ#V{B;}NUyi;WLODSyb$J@KXl3AY zW-Ih*hEa>zF60XNjDb<}Fg`gFu8N=xF(*514Sd$s39Bv^{mHvGY05kew2D~YX#Zioj~Q6{h?#0g2ACZ zie!CXNn`h)!e3O8Y&s?mFDJjEIcW=-kH(Lvwy7HYH>kp`-*lOJNL~j&{n@zoqbgoB z0q}GSf}?#oaAET&+TrF&Z7mIn^ymyU8114`=9EUgx(-!O1(_4PXzsXG5qWAH1?D9K zm{X0oP_mn850d4XXCH=`#9!d;XhpA%41mYPQl{uk1CHhIW;Q65QWd^V@F2Jz!BT|D z8s&dum#k*=zw2`w1VSGzDS~Nsw_q+$n)#A>2=(^(18-Ldu6w=&`a(O2 zuEA?oID9vVZCwXj5`5s-MpcZF)(4~F`6yIf$3`>?GXiU@NXw5I+=(lrRA%90a$svP zJg5pmAOCWQ-TDAbkEEgAPCIU5f{{-*W)Oz$$yMkXn?S(Dy z$}F%xlFV7Z5HD5Mn}z*c4)-raklM3tY|x~)G~F@@2hRi(I28}SWD?+aQU{ECjKb0` z1e=~iyso*)sC(fqyEw!bB;N;;Jo82jGc-W`vr4%Ara1NS%pjXfYN71&Q6e}qpES){ zf&NEg3Drs`fd)gw;!YCQ`JIA~hSk)wQHmcX`Ao^Fxis^v2H8F#9lC5B@o#t++eisi z$E4CrUVJ{@`vtMR9fhKKwNQN1kz9z{PA`@QkYiO|tl6o%^v{yr)b7(wp2gO^P9QBM;L*WGu&;m4+wU4f`-E=tEPI5&x3?ZF{v5@vv$mA_dByUm&lXakX@EKDPH^Lw z0o+g?Atk$iVrP~r<6yFYS#@+H7tuc*bjwU}&zo1U;#M%Go8*vZZxV@jN(=4C&!hTd z9n_<^6El?`;Ow(!pseIK4VxCst$OJ}4Mrwlo`ww^6m+7JMRI^OE%3Na2Amc>C1sBK ztfcA+P`_ct^gZz4{oehCl;z){h2PKO$&36|k1Makk}W$i_+1gk{EG(nFRS2}tO6wL znT8JHt!8^GW7*I(C7fR(V17&>PMh7!_gY^9zoZu!zilR?8xzb3*~*|%oeK9waT53M zh(wv-QE5i)_#7JasJ`rZx-3i*;2<+xfN5S_0x@IfnGBnA;QMm~%l*<|^51yAWVeS~ zwR;sU3yh@q9N&YYPA>G$JdQ^Nv*2smO6YUY0G+4;K4xi$ykTL?aqA+BEdrTGburu{ z_X6lOP-ViU{9r7qjO1SXM*kdggg5D-a9DOCHIUa9^W^0r-$$LZb^T1}j2uXA(_su`1eh`X zXJ~r76Gdjm;ux8Xk|GK779-_wO16|hCw24`NHWy0;QgMRv28M+H+*!8DUdY`sb5?AXTpce(@iJS%~b>ndDvMG4B6#zMyZ ztGr|F4^h17C|S6E2!g_v;W0&L&URFVlL@=T8?sqWdQF~?wU<&bIyWDlYyW_z1WDA; z<#0k#KX%aq@SQKm1ga0?yM9g1QD_n~+S&;3Uw${^9GAdGxA|C+B!!hUo7j98;81yp zq?GAEzVRM-utXJ#>`$U{);TyaIv4zm7sJCaOSbyh479ayf|i7Q@Kh2(CE;xFz8;72 zBn4DYyK@4C8k~=cFWGoCoo-uL#ZFPZ4)J{c`IWgCbL_D{Vfy?yXJsL{c@_SvA>mANOW7ZW8$QpL)(GK!` z2-UEXCj@>14jBA(!qnn=!Y00Cjqk>5pTP;IP~a{>hAj*0IvE!%^P>;TBwT$%|I5ONZ*A-FV^w!yNpm!tr9XqLd zS{m?N)S=nRoPI=GY{XisFXjo?reu)C0Uzk}RUL4}pc?ZZzJ;yB|7dM^AO-DHv_a~o z8T`3|aAF2$TsHyC-{Y4b=SMG|+$9OctG7gInTaVfS&q zS9QP*wmI|QnACrup4EkEJ8T%=F9O&!{}lcSH3n<>G`tf%jg)*m2Euh-xb9p47I_MD zrt@dRhGt3PpZAMC9p8Yqjj^~(d>%B(AErtHOW>JN1TXWxB2#d;4~4T^pr>B}3>I-D z_TFVO?KdI1sv2DX(sL*|X2!G%s-Rur5_0#9IurTkPFeRx4YZn?4{D+zn3&T5KTH5lG2ZkL}IqCh9X!7hX1jU^PU+s$^vd*4; z<-U(K(rch1dSmD`qn504n!~La4rLtp8gg4{GMD_-m1jF!ni>9=iIYw1QBPEa*{CFr zNsrdRqDEEhkIlrRgD-e}kGfDrCl5ueGEor{VXgNtYGW99ep(KpQzW>v)i;NkwZuGWsF;}B=@3P5gpGLK;X+kHoo{PP4){WMXnoB@71>^Rrb%LMAYZ&>O+nWC?;8nOI?&WorN*v z=7(Hr_mHnwdMR)UCbQuBeP?uDDTo)LOO=RKCAxz7*d*LrtM zJEM=S~kYzmlyYUgE{ zqBbQ6^Gahi_IH!$jAHV6aX&^2#*itUleo80ny_+MidWz~f>rMJWP9&peDa3S3sbHV z_kYFYWnv$!n@M0n$qTq0D9oJZ>osQ$1duB(qot**SeGa1MDm&xxiQ#`U3E%u$DtaI z4GR$O0Tp67+XC*|`=Wqd8d<-28M^I%Lx;+u(Fmr~(s{aA#pf@-w>jY7+yn4B?j^{L zPX)PbX)elb3Jx#KAj=L&lYdWh31i3qc6_Qtfz^*7cf5gY_~?sAUs7;hwH@5c%27Y5 zf$iJ*iB=b6!=CYXq$K1HMn4Q=_rH&VFJp>0QTGfANeAHO76w?4KWts8F~-N_;liNp z@VfE$ON zaCMqK(mEOLPrEn^cLyL~&Msh^B5`%ubNGTkpgi>=I6a$N{P%ki9G~}@RCC@I<#vB`9>zgVYnN zc^egX!1}eDF=?GM4c?{#?#mj<#e#JFP&kVWz21T@r?N;!dm~=9pNx&Q=h5M56r}je zgZG#!ebpa|+Am&_ea?SK;Uzb0AD;vDJy(gmSOJDeiDPVKJ-I$&Nw)p7rSI+bWALdv zY*3^!@lSQ8i;`Z_m3xax)C^se(>8{y0XMM0*$skL-h&%6qrlzX1Sff{K+<}KsE?mO z&PAMVdy|0qf(>|UmMmH&)=|BKuHdcvoPJF$<(0bsM=T!*;q8DT@Lwdr9dWw}GGj~O z-%t%6%fCgNQzPNwk502I%2wRy)&O)9&xWZk`*?MyHz2&dhfcML^qsyLl+Nf+xsaUX_x29x|Wn(tTJQ}J; z#sAu-k-e@L$&<26DBGLP)W_*VSg0mfQgJ>hW|iv zQwlYkTT9x8ukiKAEIc}Y7Ne9MNiqcqnf&cLCe95oy(Z6~q8(qu+G&OrSNDKufI1#I zzZ!mUW8_D@3t8uLndA*7VEa`=Mj*SCOcxX7=?GOo))Xg9nAAifbsq5gmmVW~Cmsj2 z?1_v(Jck*7ydjmWAulpRP+fiz{)!)FL*{JbeGvV`vlisvtAQzS@4hmsus3+g`tu>@ zmJ58@BFL-WufnWpk>u_r)REO)_2ByE3cFsdoJx5CPW3OxKeNkejDsOu0X5R^BM)O9 zg>cu42hU6+(Zw#0+&DJ}53S=c^FMXaj=#k^h<~Twx)jL!sxCsF$f4)O1ROhmk(Qg( z(9nMy;m4668sKLJ<&lS2fn6iSwzh^|&fkjPt5Q(XMGR*D9HB3}>~LUO3^m#Cot&eImO(y)e9IVXx=|H{`Ro)-LKZpFB{E|567EUDD+r+ucQ#4R7 z%>Y+kilc>>&f(^D3nA&`MOq{oiFqbn^o_g^Rr%|Oys+E6Wh(-B-^Cfy){;Sv6lT+? zH$9NNc!bQ~;>v2JN-?GtJK$IOA};G&29lk1a0VskL64l^V*?9zJiXGa@<2Mu-1diov1E8+(@S2opT-MCi_kPo zlH1+(2LD^qLnizTg2W@DSfeP$)J{SO4t~YYU^c^^+bYCeUX)wfbrpIOpJVsEd))Ve z2_Qe&1_OJ6nIIv@3>?Y>lcsja%FI9wzE|jGoCxFY|BhsdZ|A&wvw=6ai1#>0k9*Uj z2sPE75a3?`Gq(wG%0;W`zrBBE5c_Tx&X!Y#@VX>OS6+mzEfnT8%*FB(+N?v49a~DR&`9Go9dD@w9l5U% zEaLzH&3vDtXcqi(N&>44G4ku)6i!8|ma1Nnp=*C{h95(o*z+r&7H`gkq~10XcR~pZ zZp=kJzHYR>E*F+RK9BKyzIFGGozSS|#QF9I^Cb8)k&9C;Y(4*nyq&Y0uFlRUhi>)q z>h2TL8J|zKw9R9SYKQTO_d$$5>juxqleovLZiD`cWw2ak5 z*|EnO|Jj8Qh0kx{Rf-mK;En=XyIq2E5qosqQiaE6#KV!s3=$ux06mXu=)Q>Mq3Vo>rzur$L1JJ5$K^B9|X9j z8ye{FVSr4&cY$qtxe8|nAkWSvn~aq_quVOHux_OqW6nRrZh4AuO7;Mr6TZdnJ{dz* zdDm!E7~i=QWwcAl$n{mUnpNP6H)*4) zMgnd*`G)jg)g=|iV%*m8b71#Q5ELF(vQfUDc)m~C=*tsw3^QW^D1NvPwv7e+JZ~X( zOqPVf+^?)jM*wV}a2NKaO5qmC3pn^;1DLmKBe^z+_A1v&t=n0)KPn0)N}h&OG7wnO%9 zNzywfHR#4k3aG5BLbd+I-~r|V`8d6S*E#nz*&9oNNkCLtcOFr43FrUbo#(YxgL%3> z1B>nAP+d-(5eqxZ_ncRg{Dfv&HC2$iml90DXB?f1x5Hhb%jj`|(0Ld5yzF&;?#TB( z9rM_ZJN*M0*O_iGZeflRj+1fxiU4HQ8-Sfn0miaAc%hk}(9ozq$2pPozfK!yoWBM3UmSzq z_kxJmhXDMx`Vk5{x6r-sc9Qeo_Tu#Ynw-vs0H|`Vh7Ki35V4B}Ya+wxvii)j8V=44 z0#`U?oD$EaWZs$sxVCZtZ#^8KLNV41vsajFiOJ*s+3GW<)@xBka~W=2qe6NwF!afW zmn1Jkl&i9O3i-bhG0WJ4SX+z0p^|YjEE$e+`7z{adlmEjjyA1asfPNaVzi^{5oq}y z25G+6EJK3QrC;vSO9wVk`5;Y9h#NtB)#W7cPA}bhV+G3ZE@X|&;^=Ij-Pk{YF!9TZXKk0m@!-pCkxc0u2PN0f#>!8dLa+^s5GMznP%M@^=2 zPanpSbLS^BQ(R)eeED+degoX3vh|>~qnD~?UdE>fH{+S&Vb-lvg#)gK>APM;>hI;^ zs$XrmJnAzi={$_n4OKwcY%7Xhc#V>)tBHFM;m(cS;i~>b(7S@KuTtbk6TaE-#cUp*E4VsSoz*xzk>OO{GdI7P zakcqt{zuVy_;dBXVcZrK4SOU+DT(lT?jwbULXw6Or6EeGq(oZwNLIE&8nUy_b00*7 zP)Sq>)wfBhq&@XJzrO%4=bY!c@9VnW7uI)QHw}mk2J7>?urS1*6CGDUV_Guc!iRKn z>EjSat!9B)G6TNojG@15nxO8lJe(=Ii*-(=FuXkncPHC0Q%B9&@Z(uHKBoc;R{TN( z-*5D9+gftrQZ8QB(_p5~{0x-OCGA-e1QXnrnTkuc;KBWm@X`Bla5^=fjto5`>N=e` zA?^Uy{EH*oe%j!XkxL{^m5}A^50ZJ#7L0O4X|?<^2)=Wa_iDJp>ocu_BD2%r&-Buu zwGnXDcoB>gULeDv;+&EHF|-S{gXgh3SsU3(VtM{Ftkm{E6?t=FUOI)g=k?(2V^Ks} zOcuP`Hlg)eN4PqMcQccnP}tu?*Gx;pH-)kAw=x`dg!5a~xQ`^QUlaG893}s~y-IHD z`{T=CNxVB`1G#S+aN#E&fh=ndM{m`Tx$*~z-vc+)8c~Mwx#LOypand(cv2a+rV0P* zPr&IW1K?HPPKRd)3p{UCquj|6l-soim#vBir>qJz3!F{GHzDk}c80#r_(XqiRL7;G zRk&j6KGQQoid<(z5dS-;Vdm&uv_5wkw{3_vo!~cAwednO?z{3EUR=2>@cX?U#;QK1 zZaac$xI+%vATtT8^))e5XMmPYi6tuak3lX~3qKr4!>@*?;hw1>?@iwZF9!4{=j zvJQMy66G$sP9~8*UkSE11!H@dCA2*dfr~Er|Ai4JlPI6U*w2xP*(E2{MUFH$3 zD-UPwX1&9samD1l$8-2<{0w#HeIt9r{NdPgBUqO9fT#pX(i?0RDEqWwrV>o(D0B+C<;HOUC3?H_?ID+FeSrNw&}u6ji)P)5_PP^1w`FzR%)4vtM{k z`#0)Mp2w`WRc2P$IN|vl<3QCqo?g7y1JXiA=*(Zv82)V@e15wM8!NW5fD$6D^Zt`-tFhBtQ=^nT&_z#h-n+9`k24Lc?4l;ND zTs*I-2AgBHK_s86`BeEAvOY;+@9)#F``263g^C`O<|otVhi&0&V+yPno5S^9uM#Y~ zI~z;q2gBKo%0z5`7TMON2wP+n;q_kzUkZw7|KpALakmJ(8UK~seKrL`eCL2@{vQ(Z zBT`TcGr)b|G^Q#G6Q#X3iLb&b>dDUn%Z#p&Q)NSB6H&mI<0o;R&*IVKwLH9dY>wQ9 z$Hd;GgI1jBgq?BaB(XY{GzER2DS7FvfjvKm%<$y3@pkmnxknZV8Gvz=Ev^4{hW+EV z7ZzV0%Z<_9hth8|Aw6n3XRnz{sy%C%qZR#Rb6-3}zO2IPl!p+d+s%7)ZJ12wdiwU; zR`$u!rzn~@0>2BN2|Nbkh%TP8Sv_$J|qVZvA2W|VQ@zjj1(7vn!P&e z6;+^&(Q@ciaDvCkb( zPFJ2EbD6KtJ(mfiO*iP5b}{a$_hk5@y%?ZNiR(AHO~Pa(nSuKm0z2QG@OtYu@KC;k zV?;eTS??26Vn7U+;6!3+U|Tl&WtsbztY|ilPr3(5HPYUDp}F z8l4p7(#II0rdd46U-q61ZKx;4rl(LSAr%t)3IurIARW2;o*Wx{4@w%GVUu$&8$C4+ zx;kEgcJED3Qk6RD zF++&zy}ST_)`Zbp{Lj&R&>^_cUkp{+NRFA@qV7{pu*3e2Wck*6aEHIwnWc)6?(1{e zwf0M4RZs;ycGE}c+wVxKi$6>cK8+a$9;EI2Tv#{wnjYtMf6E`)WWJj-NEltB#}mph zXfOna+C<=2!vs7%OCKV#_&oVVPiRo*ccbIKv*@ z4i@v-v)!uBc`ZBUzMjaueMNJ(zTT#5bBXwiLO(>XEPQ0jXLQ?ymN`bBHFtl0@9uYM{wedAX6>-GRKna{B~ zU&0sW-%J(8M_}guLMZOM#Cx0#K0#7T01S&tfXz{HjwD=wE31nk zJzECh(f~=n^&OO}(=mCW84_t;$Nu*j-WE(@zFcv{H)8_Xr8bpx%7q5-;d7urYxiUI zrE$2(=^lH$Y&lnPHdSzLS1Uv>UPf;n|3zm$F=foxhI2Yf?%*P32iX^nkabHoGeTwY zxW6w5UmcJE%e9vwL_Y)i)}BGXv6{Ho&j?mBt!O=G0$#|9N0r0dxedQF*<(AlqIB#S zEbKW;yk`ud^tKOdQeP5tWSk58;&~(#pUuPpqu+Gx!*TSalM-j~`3Ic_uaH}>4YmiK z(*-|j*t`Q0pwa4y-bdFm_g$OO?#p;AGj#`xI$eA;?mUrrYiqir_8_jxE+HBk7eS*o z7#^2(laM>E=sSzgPPRQDoCES6A4%$c{UJI3Y#JI>2+`fP`IsDNgt9WcUOK%LELU9w zo3QbabZ#fi*rADkm-NyzI<18K`NoP&*5h+*C3MzYJ3JM4n;n0>h|bzoNQAD(K>6P~ zc#<`kmu(TIsOR|V-GjW|v2CK29!f{|2M%jRJ! zjF-K}j@h1tA*o`NjUB|X4!_Cm531bq(_N&e#t*AS)4`B`zQUs~qTZ8O5`RXAJM-`| z*1c}RpHvs-3OcK#(#+6!ivz52K>ri{a#@AzCy9JtI%2fg8moX4kGn7v(sT`=x6 zW>>vK>n(kPi3ZEDU0n$8>Y4%%yZZ+}1BSCb&hAvDq=^$IKtza9QU*RbeWoT-dSJmcjfNPsCQ{~!f zIvUHfdw9o%a$8?PT8qDNpvx}@}PnF>PE|$8r2BMeJVz!yjkgnIBLj7v_-N(sKWMkJd zcFmAFX!Z(0Q`;jd*r~w9Kk&h5pA|&v+jrP{%nR(coh0k;ZATd!GkUh>DqFm6BB=2C z_m=P&_>ZqEODjqNOED{YuYM<0d)h*N=-kHTd(H5*LOvK61VPPSBec%QqY+w(UqrFTWGD;IBsKE5NPdmqDQQkf|!FYI3!2m=T$T4m+`%z zq!C1{uGQ1deF|KHj6Z+o*if6_t|*u^kJHE%gHsEZLAJ9B9x<3oKI*0LIT>AMa(^>* z3bm!rqMu>XP!_mflVHdEizYqULfqP0NKOS9@|@EmSRQ&F()X-Im#JoWSty2Wnx2ba z%!To{%K^svv@5L6aEHVh(>R~d zx^7%oZ50Up{Z3q;^^tgCfZVc^VDUPYh)XLs0wn6(`Ue%~ZzQqACa z;v$Gl6=G_{gYaIpI~0n^<8~Em;7VFq{i`FiVai@87^{n=<}zq|FBo1Nm^LGU2h3!Q6(<|Qmb`sC~j6lSS>74L$3;cAoPjGAW1h`Kp zXjrYlW=k2+{Y@dzoLW!+Q{BkE5Jci{k0{e2|8t=0Kn2yKFKO?W5~zoIKA)~Z6^a~j zQw0N2r=Nn#svJD~xrV5z{2(e-ZS;v;szBD`sz5(99R8E0IHzSJG)-QH3WWi@*APfs z{W`ida6kN)`*cW^ zNgl7Y1z@YrYofO*5BKd9BGS*&$f~kQ5XtAp6LyqV^~Lwm#_WACqv#a=?e-vBZ?$9O zRtM_!Jp;D>Hh~tCJAzI98q8ghDEwWTELi( zOE#3q>zC5S(~aQhB?HdK(1fX|y#~grvGB#e1?zOwVO`98X2aq~(74qIQqt;)kGl@D zx8xvwuUJE_t&A2FncT(kk$cgprvxHBLgA~u3c3Ux#!O!|P~S9xULVXPK~dEZ_hl?x zelwZ7HPlWl59dI1dN_L9EUXOMyBz{zPC{S$U9u%;HI%Gq!Iu}6U}5n|x^c@{x<_rf zvC3*4_FT(f`uW8bQq22b)*86Ne_}^yMw<;xmOO+vC4Z2&y|VD%cR8$ZO+?oj{bXH7 zH$DyoT9^8Uq&!nV@386Iz|vIeG*OS5+Z*CI?G!p>atW*V*J%qUOZV{48De-2{1;v=|Tv>LuAPKK5(O62+l0$y%2V7J?U7;bqUqaC|wP01ZT zzg|f`b)3d;85L;p*bw%N=eeNe47`*#z-smzX);M;Pyg1!_;>s1q-zmc}~mq}9{>=!T(+KzBmo(JMBvp_2b&E~;fL-K{PDAr^-ntuD%oy$pMUmFSvoQb{4OCV z>k4swQbq>C?i0c8MWAuB1y7XQAtJ$q~gD%zgM=B2wx*d=u);6@2{_fAAZg-@vF zavm+}S1~Jxk6@CuBA3xT5$aFMF^kp1V5#$5p1WYmeUVaQ{)mg=^d-}{4#hI6^J5;E z*2Lg&MhT`otR}s>N${mniYs}S3Aq-h7`(a!Gcr;j{YMmieYt}i-#L-ba8CsTc`vqe zi4WE$g=0od4|W7h!HsgiV1(z=32!hLI3`PQ8&}1!FQ==4yyiGY$`#o)+7g`Vn@?oq z*Aen#TS(PQ*HN_i6%Jc=^YK3ZEJ*q^DDXK)Or4(mfqB+Z>@k{1i=-Bl!O48~Ywaam z=yHVDRpg`!mRk8miqRoj7jFs(0oU%d}vMT+lpzsz{sLtm$@Vf@1 zHC4<%VQoQrMLkS+8N|oQkKmc71uG}|iO3B6V;lX1VEl)jWWLC5W=on0itbzn4a<8; z%!FKK{TLB0d`=-sce#-rVhMak&4YR+g+k)NTh!`oBg_cBD@c;w&ZW#qCJRGUneP0_ z-2O?21lG%E!-UV@S?!1w*in88w|(d5Fq3HV+c3A4=EIK2?Ev@f^(!^lg{=JX1nkzA1-lxE3>31~V=f0_L-D!NT zD+cGLR^xz{DBf^Ug^?BC=;LM|xZEy@SGHH;nUX}oSX*Oe^HDcQa~UzU@9HEDwf3a3 zND6lfU4=@)cy79>Kl!P1jMs2_A+<^rzc|&vhB_hGv;PC6{MF}L>;9pk@ndrNC7<^* z)?iy}`Tg*c4M-(yP|t83n~<~%Z#7l0_x>wmC2W4MS8qNc#nB%G++quw;JFZm?FR91 zZ4l(SXhV9q8C5Wu!@hNziSczvqOY`@eslf;@>`?OwM-U=hR4Dft4P=!s7VKQslfMI zU)aKHn=-rqzf;p<1WTpBwK#_U{QCCG3Azu@EtaUzndetO=b_t(BNFK&e%oOvf^;$8Vj~1SHLacDp;3Jf~f_$#0Hx1 z`nzUwEYA{Ss`sK1`w=W8g2|4?0l4F$fIpAP;-5Q}^hTEzw3PIA7phU#<%8O`DLPL{t)1AUN zG-YC}{9H&;p2NJ!zKH)xjbX;$5rZq6i*e=Q`(#_B0MG1ui6V#k;n~Vxpv>SPXG!@1< ztM<~W>z(LvJp$P5V0y?a0C$=X(%N+q=%=^}!c2Z(&Xq^xl+k!5Fya#K?l}nQ3jJh; zA&*Qs*$R)s9nkvvG*GLY1}VdUEw99(azi~V=J^}F4z`rNBEf{|#zCEi2fUH*#fsX4 zFuEt3t}l3oe+wh%m&NgT_IwGB@)>7IIF7jb2phiL#J#6uv3NlpBs`S{XcJ5tyfU5xcOjI4iQ9ZdUw(^WP4^K+hq@)-Z`p`O%9WQeH&#>2&r&!&kbv zERqd)D+F_dNP+S!Rz5e2xz zA|DS8wb1$+GpcVe7p-Ce7~##BB;yPgo)5?}e`DO)cY?`zT23yMRiKHQ0S&xw$2`?4 zM?I}euzB8TY>la;uQzm(SrkIwJA&-=qd0P|f@85w~;H9Lv)+*BHQQw>ghs~}H&D#=FYuXtq_0|j2k z>5e5UnVgzGq;{Gyb|jqm|GtT2N}i{0R(+-;fy?=Pd@PC_J_<_a9Ie~>oo?6r0B@En z!Z%Gmrf?>Jm=%nGCZ7-fIGBkEhHt@B-Hx%g$l&hunt+VhFZN+;JbUM@De7LCf=({m zFzm|t{TyMP zdH-MzM5bB8S+lKz*UR)UqvJ5zKi|SNt5PO26 zWuZYJ-kuI~)no;^^UKNTst~-l=Ph}wQZ1M}A_>j~{-m?xF?lUD$VzB`BlDZQK_+|> zEu|4)F^lJ?#>wKI?;04C=n9j}w-Il{AY73jS$Qf#K+Z*s!S}qDuvTa`*}lk~+~EJ$ zgL`#Qzxy$XJ^G#`&v2%Pv=-nAn=bm}@({V{8Ap4Zn|N01SbE7y0XX++qUH0EeIlF= zvyT*GQNdCeZ7HManLkYzzB~nc#P{GX|0!fjt1Exih+vby|AcpEWN_W4I$~tDhTf3h ziW{+!xTmV&I>(v#e$>WiVxJ8LOU`gm>O< zM&tZ8(l4fsGkrFo`yG3zofJ)sZ-0Z&+qFP3#~O;ecz^Uc5xR2)izjMM;)dz|rhoOL zs=@Hfmg5LRBK82y=o>-kG;yZ$}B*MZ@oBfHGlTZb_ZAa@3>zs z5-ii>z~=Ti!MYby(Ulb7#+qAje^m`xu3!eA>?Db2n;g!aGfJ5EZ=h_wCimkz1)sQh zcJtg!P$*0Xk*+b^;)hBg|15^#IbrB-mgTL+0HZfUXNy zAyHfc-t&8~4dtr1HCqmjrt;j62YKW|Y9igNr_9az;KFX0Fdcj&-jF#>?*aZ9kjkz* zFb`MZy<@xZ#D^NX-#Fa#mF6#+u&5ocsHafZQ$cXIa|gchdxE)d55Xxx6umKd4_dXw z2&T3kKwX`yf@?8aaO7A6CTQlPrJK9!Ts>?fID)w!#(vT$$YC7ONb^Ws@^V9@I*_uYR08(&U9 zXz_zoi#inAnL`x!%_q|DL+R>Ea(MBG6p0RYrT%v}Fiv8FWUr|i2)$VhNo6H8HElI^ zjd_C=ypL*9loKRAK7t>fn6i?*f8+jyF1l~=bN1`HNZ4CSDPEn#X=aK+YE~yc@>oSn zq-MgePiigwDKEzcz8a|8h znZhN5)Gefn2F-a)T!v0Sx9WC0qk5}y<%@%YEvy>Oa2cY%U(AKP>~8u(UW5Fb7K(aV zMo@S?nx-C@LTBxW!uh*>;n3+CngQ#n+J%|$^pG?iJfjR_9%d4S=h7hIVu*Qm>ZEsc zF-hY0{vCWHMX08&V5BpX94V5g&doOHeY2MLQ_O&@(+W7i&Y{=tMpE%5(?Q|a5c$2W z1dI2cWW|z0>4H8zn7Pdcm$tUEVv?tkch(mK&VMHe zx4jv@#|0Lk%ZgHSTW-*q$9x^s_3_TDrj}|izXyDpn+^+Zt zQ}!j3^XW5To768soo+bMKi^2*pNzq+dW&IeTp#(Xb`&qyFCYglY~h^08ZxSl^<-#M z4ZM816MxKWBtI{|f@`&wWZcZdC^zs*P-ZI5H!c6hWAox@?(!25pnneE_Xsgj^VX0F z=H-Ip{S8uSqfo$z7`(7J`3-=hN0+-SLpcrpCDQ?9>b>!asT){ zy!Q?b8o@J3+boKS##ABRBlMIRk$(j9GQvPRUyloX6~wI0%)l3)c0t%aHRk;65XdN3 zfpg=;F=$OMEB<*g*=cZ)?s%R?4C1nJ!h#USLg_5EX{sTvoEIn^{KR|H=X2_U3v}v~ z7?kGkU$2tyV%}_uzdxMf^j~}frKT51hW%jo8$&v;e}wF{n~OQTSC=1>xm=q7W=Zuj z=Hp@kg+y(-o%EN;L-`53x)YBN}Ua^-uSD3*UZM_wfUp^{36fIApLJ-gq*jv_r` zDe;xmsId5E$e245y_V#bo&eJ)sLF}pwtv?_Ka0{2 z((g!I!YMe~|B3Aso`*ZX7%^3IX2NKz4m{Ws&pH(xrxUk0!SdT>^k-rKHMTiOSE(;W zj~!2|s>ZE^;zSADvmz2~L)YUApC6`8b=J5dCk4O7x#7l~i#W^kA>E`n7as1Hg?yem z7sG!~OUh2sr4`}u%DRsjJ?mkm>MsNHO%7CSP7tkxFp_yJnihN&f>}Cm=(ON?bkJmJ z!b5dDKkCC)b?m{04f$1XMkisz`CnBdyPkoq{6o@hGoM6E?J$|NR)t+#S;d}+c~0}D z`x7PKN!0(UJEl6lCR$665tBe0XxO6y279$|Np3oOG~_JNl9prpes4m_U@JQLlmceQ z8{${-E}}N%LqA?y4>RU|66}7|Mwf->v9-W#^SGfnbelUA#z9!c2`Km*=I%x7Q zO;8ri6O5&s1+7!$Avxa<14U!V-r`^6=FU7KVWdT8FQ0{6e~#dBfD*)e8ddIJbqb`M zD@oAgSLonpELhqbPQR_X1hd{Bft!N$G}C$~D^(&-Bja=-$yN!x7EHpkjsJ+F)jbln zIe`7xc9>e&OTz~bJ#dR|rTeDdB#c;sAVRJTR_eOYr2F^igtfIKdc>n;K4~E0)oa4lj~9YZ1L9 zt0ZXuxf;_GTFK@Ycd+866t>1Hqo0E%mW;aq*K{-l(k>Bf%KT=MWqXV2|Gguql`f@E zwdBD;;WN;wSTw zZJ~Aa`&SQGG=Db?H>DAg#&Z6?;6-+gIYZZN3a8a8#v}SR(oN6jkX&6U5D$!_1;#~! zhhwtf#h5s@I^};BMBmZ=@p7;}&KR^K7SSj88ig~klLTIjvL9vTCcaBNQl>G`=1ofpO9cy>H@Tjv&xx4tgudb<~^ z=|aqvROfw2>0mx~fOe?w5j1~WhW+>F!p|#PG4=R;^zC>6TP+Q6PJt}WUa|~ji5Jw} z8;`3tWRQ&g6Y#$e>)}uSN5R>PYH*Z)#}>(5BMaQbxYDoAQ24DG?|w0bkN^`1F_z{c z|0yvKt0h6}<`eq!vo2Nb+JQPr8!_T;7^vw*3r2371KH|F#Elffs?f(MH)bsL+dY<9 zkbXmuV+q@HF|@c-fHV2u=h)?J z)}-PsuZQb!P714yO&b&hr`?5_!rdpSZ7$(t=8PepnvpQ+&J0MLu#c=d+zj`=3;12G z0GD=E!*jz_`fQv5QxbShpfPC*%urLu>ZEu~4vK*z->;D$rxVfZwG#Bbze;v(YoiiQ zTJU#DI4ztsN_(G=u+AwwVRtD=CWvt8R zNZgQEL93$%$%8B;v%i+|*|%LFvSEPgXI=sEu5A8YNub?-wZKDXfXKwe(}%~4>4C$u zXb1#`$sL^GU$J$>wv5-pry{8 zWMl+rYa-_sDIWds+t^f^PzILx|JdyA=LdEN08rI1!bD(PZ_sLV@U~2^f>_ zkFsWfw)X{K)MiL+^S9wYF$2iZ>ZI2WM5EJAeK>pm0*hzIn#Kj(M;q=J{ag|Qe*Lnj z=Vl9ydVD6qJ(Z~JOopJ$RjAX@A!x8#2`^sNRM}h%7HmJSLnbWyLFSdn(RIA8>^G^D zOcasA-+#Z;tm6`}MtU-N=X4FFT33>WN3(dXzKA%N@%xwf%h<8}0jMphh6wE(s?hfj zjqRS|49k~fheRAyI_6d0v-e}`Ze#)E_y9bAQT6-J1DMyJ4XVDeq%7hyTK{b`%{=a2V)%&ibTAfp^(3^?oJBybxW^ z*m}j|!e26+$HI6Nnlg^*e989*?Z{)>?O%}G8L9N0v^_}9`ppiDzQ&BS$9z`sF1x

3oSPRg(nn>QmJv`|zhnLdQV3vm{iF_7SrEb*-^IlY9 z!@*%tPA{UbMq{hKx{Jeti#@DH_H#JbxDi>a2IBK77S>rN;fRzBtr**cliKsC;r=%u z|FDc6H0x)z`*!07p920pPbIHj?PfN23b>2$essTi6|+a>0$O`&!UAOpiHIHdyg}bb97>Yg;MS`|STshL&t<1U$w)2f8CuN9xK1JM z#ZU0SX;r#A_zX6#SEg^C^8JXGO?dOmIEbI)fGG>_p!k0+#Aq-MM&nmtg}*do zQ}S^KO;Zsj#lJ~=Jff$=^R*k{zrKlAy?op&dm?j>#&{2|An-e#C|&{i+QG!9*?}|!Nnn7rD!vW7N>2P1Me!fo*}>;L z@3Y_+hUx6bmCwH5AMZ&19lJ?&6d8fx^KfjKwHB1h;#eEicY@y7P}DE71(%v2@cSsSf#lg?v@V-$MCa4Ufwv8~jk9e~3Ug0?L>}|R&R}$N|#G-9;1-@4lp%V_Q zfjoZxaCK8<6NS#2N;uXLdU7*sur?T8PnU$r8+Y?Oj#sp7?>4XrJ`EEV9YgS~L$2M(iW9eJ|LEfaNe(eId>^ z`%5Ym?~uB08I>%^AzLdilB|k+T;+a| zdl)TD?)ABYxp5NpDA0lZH?CnzT{up*RA(?&6>8cyqOi?2CQB!eS?72TZ1n;mrS>d6 ze@BJeRrQH{G|52Y+o{xaVG2Hzzd(QN3V>f9XJP-w)%41=gV^y~6HC=nxP;dIoSBdZ zS}+2prn8slC>erz_G9>OIm_Ir3L#70$#Z=*3_70wgHB5(ahL7nn0*(#&~@qQst6Nn zW^~^&Cb=O8KJ@RvEsPD3xqA=Cvid|Pq7s@O+O=6w(&%)-(ZrIc&PbVpF zg~Lx7EWWWH0(FwB&Ln-MPG5L$!Ndw!tX_bQ;h{J%$AJVl?0ucH4^yi8xag>QFYlA+Me43 z_l9TkoxV}<=-yxYB4Q!m!L@@vgh%-Eo-lpkFhZZbmg4F>3W;oaIG+4lPQ4r_6FCVx zV!0{>@7wbCPv#ueacd(dG{2FZ#pl>%%9>>HeFJb?QA35YGw3muucW0xh)hqqNJXyr z5ZT;%TI@nV%V~)IdoN1Y&C!7?mpy3)KYMlTxJu7ml>r;`5b`Anu)8#xo-h`Iy}7oy zV}lFo%t)jrC%H+o#fpHsgqlg|(na&7EiK zw373|GjYXe1~$95P|3`2`sCOW2(UkmndPahr;G{oY*r%O841+&LkoTNu!L*Y&p9i()TiQ1nf^F4=>>Da)L||(RxRYJAFN<5 zrz~RzKka2^A2Y--Ne8$scbw}E3t~EW{>PATIJ4uYFBrVN&Nzyk;yP#Pa|WCW^R`Ev z5$QOKM>U)9i5jn6yi%h|!$oxap>QsVab~QRS2F*3OL8T)G0^p*0RMG9hm5Xlbd251 z8BSjevBIAqL6XnhZ|-IScstnFOL4;b>zQ-2nz?q>ea!o^YvlRhJ6biNm-(rk zh^zz*K;qx)@A*$i{EwIA1~n+O0^SWb(hYVcadK8xrKJdd}oV? z5gq&T0GU;;4?oJEVE(%ztWA+1`W3(EYo}E7zp`5J@(D|CZnx$JR%F6J$w%z&)?~~= zOX*L^42VxFqAD?$vDR@r>T30pxMq&5+;I*^-fyStKUv}DUkQ8{ps7=lv>oteNe+*E^U=A(CcasjZbfC@OBGPkCR6O7=P4k9m$3 z&D@Ds_{@39O=Bo>?Rj$Qk2j~cr-w_)ZccJ+q<8b22DGvGJ(@o30p)FdwD-YlwDZ0T`EtRS5?qD1Y(g+Z z<_+H7qRMDi#DHZ)Bwh9WJ_*bz!{O>FTn-Z@Fx@T9EjNip3&SjYSjclF=j$>tWr47B zrWmvO@c_}Y(_lVt3xt&^O|V41o3fwN;IO+A4#geBxZrnmym%(oDtEB5a(pMHLMBXZ zD8Og;hlzh$67m3E}G%< zx-EF9gW%nHt6}qkBjEdH3t1d1&J;!tLbdHRT)wmh=auB~yn@|i|KmJ-xbB3hlAJ2@ zbTi_jX|Y6)=dF9ipF@*dnS>7n;)Y-$ycoT<>PxjDER$`fomG5C?&kx*E}w%6KVqnC ze*}78Izi{U_hE9#L9E;6g*CTt(`6Ym;hc#BOx@oA!;>26gCmAas&E#uH=YKcjDF(I zP(CN#uSQ&?lQ8Oy15VLb$IuJPc>QxK&b0qVW-rY_P5lCjE;^hz-@^fo>JZ#+XhqoDmqKKdRtWcJ)KX9i~aayy(uiFLmbjb1DV`R<8TEk?$) zqTmnx6P->>Yel&uh2I5>HZMf3LmGtf4E27LL$`iC!2r|N4ILKCbZ0x;MBiM;Buqwn0#gmh|iyf z*ODc1H69@u^Mt`;-4Gr0T_f1AX9*@ar*gi>66x#o`?#^sgXzXwK;9N9ccuVMN*H@y6@hyK-Y zW>(%58Ov?9 zx=rfTihtrqWT%P1{EJd3 zz2p|Pvu~jEbv9lUtYmktKaE0>uZew4ES@?4loeOxa|*q8@l$COt}D}qgB!AO?|NZk zHyjS#n{@H>v1r`Ad?&0CZ6L*t2~|7tOcE9-;U!v*#&1J|aiNlLEPpH+=$!NT7F(;B&jx9Ww-E+lJ5_OID zB-=A+FwjW7*=_XQ@t-{ZN*XfDY#HG#m8=B+jGJm~L3M9cE?*=Z7ldqveWQ|8_5N*m z)}smTiISlGS{E+PmbwRp486$){Mc77KuZqzmBRzXsix z9iz4V-2#6}X*emY2MabxagKFU;No^+W?@ShO$hIV`^S6emRIXh-Od-zIt%a{&rMR< z>n4Cyk3jZn-jRD3=X>@~=qf{CL>=W-m63NHUizmQru~ zOLSAC9oau%1_$P~z+tt00{s}it2NOP1FN>;xJSPP2X0Dmcipmi--8f6Xgn7#D!PEo z_I}gFpWowuGTxO5%6=HVsTJiO|AKil-&8GGKb=PDWL2Fyun;p^mqXEgaS$yx;EbHJ zNxQEIepx3$7)=*!ZQf0j!}#3%$$A>EaeHE_-Kqv`o-Lwcse5axNdpGoesu#qg>nqd zb_Yc#4`4Kl$b&ft(09}VwmPq1x_u~i7&QuJ#2%wxJ^zw#N|J1j=q2(pZzD|ZSc{GR z_gUs3pRu2=O+$g_;mz6)UZWockCZ!bbEp{}VvO1FZB=B)UmJX2m#IF`5nZ`j!w-&>;O%bOiqdTl3g4fY z#vRya!FRW1a~lU=o+HSWVTx!P&Cy(Od*LU*Y=Gbs?#!;NsmsP^s z_V@Wpb+Mi_YZ$+5TeUD^oe6(;h&^{)>7lUs_E_s-9iz*o!-_;P(tm{ucWn|5jC)mD z`!`wW8=oNXz0@w88tBMrIMj<&lEvXqC81n|gDKy(<{9_vMk#l2+#{|ibcM+BPBve8 z)mg}QPD%)a2HPX54rdEqHj4dF5Io!qK9iF2;F&Ue@^6-Eye-ycU`3T8^b;3QNO zxtwK(xy5;xh25Wi^PTUyIJ1#H&PJ5y@x>xc41LMUSgi z@cAc|gx{twE^YW^SQfVNe%Z&0&1Lsi-4f2W_2OpAO7pv$-U|M%i4zP79?oyuH&--k z0eG)(0!;L=;*{4ZaIFIv_fVWA@w%o{cv^Zi_wM-~{_nh}f=F9azO{A=?-+TNGyOMG z6k$D)Z{I7!Kd#_;-DBarU!bILQ~Q0<>hY&U+ugvMwgmDXryuh->~y)PN*UgFo}6`% zORm^oWY0BEQW9RQhp6GQGB@LQhNxfs7Ef)00dF>^RrF`2pU~H7mdJkL5y7}zdwzRy zq9C3UIaifkeD3%Tk<70l{D9r&T;y*zuKwK({`w17QR&%_BKtW9*ztSK+%YHwfHP?juHCLQ7u-iej`okAd z>cH#5x`u(=Z5IoETdy4d;@d8v`{vEu?~oDv8=awCc|o%g{R|%JCjwnB)l_Hc+{!^y0CrhBXWdYZ8PL`Jmcj00?OSmaAI|K_R=axy`+A7L- z94Bbcw-gQ<^PlLv#tmW8-9cR9V{LBk7)h>b)VOlNm9N~#`{}~cT4ho3u5N+*Po&r<7qp_8re z{nNIVXt>Y!E4K+f4Ym2_y0QG^jr;k}&GG!`vR&NPk}6&xUL(oY-h8DOxaXoVTtM7GNsQ_n9irlAGSf`35@i;@WM|rJwt_Q2$%Pcm)rNfO$iRVrkdGVV^$cp52CW`(VTL~K*P_7>- z&RES=5M6sVjejtFkWjzkpvZkdtRP6d=C|G+EgHJ;dRgk!??T&iHKKD9mh#V_E-cbI zDhlo>5n;DY=@R*y!WR`DoYS52{IKM~A|J*4vQtZEadWy#MNJN>qSt{M+!zU~^7=u; zg$k_}{Awv3ZmjznVeNy<;+d?7UpRObKWX$((W8C^u3YAY2vh$Ey{+0s@1ELlSE41k zC#nL`G=sT(uIt4z`|*{cjT@(M7$e2G%HH9ll(hK+i}OU<{#W>K?HSz56X%6LMuQi< zj1tndgFGkp?R%9Lh-?H=e65ZncX+};o)%5umL7U4vJ%De5&p^imOOE+z4btTfs7I_ zc~zVXGPbQu4CE!>uW=~><==0$;Bk$5XE|-MgBz2-Dp2QeMX1yl~x(w9h1t% z3B$O*-2=-<%=nL+yx=bPBE*mX^j(Hi3XT;zemcV)YLyfvI6meQcOK(rO6zeF;=0&tO3S&Mfu{w>agoWeiRHL~`y%o1r;-6#_$fC*oFoaFy=JjBmD zd5qT&J|uE5Il<{=$nhDPU1e*RM~Sr3thmCyY~k;u=bX;7Zc%q{hL8>z3G;4*a*6&I zM0poW%3hls5Y^nO7w2aE5U!O;<-E_E@aa0o#a)Kt^PzVYKjGigvM-hSe0ISIey!1S z{=(KRe24K|&ftDJ|L}A)H)_5Mhs3I4bN6S$+4`#7{iip0=U-n%64xFG`MPJq&xgK? zvQUy*T6%4LOQ9Xv`4uylbbw;VYvzQBA+TF!RM&9IY7W8sC zwK~={kF~h=_1Xc!mT$c5;WmNFNp(SSkOqG;Y9{Z| zry-KhJt!Et`HN8Z{ycucG%ew_W3xpXkuK%t|I3~|8^?Qey|a!~F6PMMr|8>|2<}$L zc%>KZ_V#m|AN~U7*{^VAd3r#I>L8)PvVa~n$4w%b5R_PL%B;wq1mM=k@BK=uh7+UfR)EB!910>kD73WM&uBlSw-e&T$ucfsC_5YFs?wJ1F}S(NguS8%yRhgW`5Cc4`+ zm7l#VhVM8lEws3<$(1_|<(vasxRv+b@Eslx1eR=E`MY)hiGrtG=EB}{!fxq10tN4T z-1(K0_)}Yy#l4iS!rBxS(TEAj+|AmIGV>3i+|$p-{7usle8i*&WfN6*30+MxMcbr3 zt-i>Za69(S7A4ziiV^>6Hy41w)$gl80IYapK89eti^`WpM?-+mM@o28Y*NW3rcMxn?X3Cjd9alc9 zni;t)}xKXNSn-wWGEryC?6{+O%n-`TxFGd;4$oHuv-1 z;_toQeA4Pw8&+5*uT?cZBymXM|G$KDYOq*8k2D@{gtMDG=^ROiTKHwU#}UWVKC80jFSW zSvmm<+t#2s?;JbdoyVNi%9zJPD?~KLG2z->cv@L+^`_T@)ivH=R|5BA$sw(>5I0r6DZv@Lgu;RK=E_a)*Cr z9SmQsO=+WOm0ag29k@F9Yq1wb6_ma|xG`3{hNle}R24+61>FmZ1 ze6s68;_hI`_w0r5lvYGI`cbvV3G5|C$x9BhQIj>Ga4r%T*b7pcn@fV~ENs5K2ND@E zNMi-qTU3MtUUD@1&BS=%I}B&N`Dvs4_d=wiVq6tFv61*Ih?m`#!%_RRXt#-vPxnxhtb2~GM_cLKfw zCmeKdW?wvf*q4H6wkhNGn&Z*<}-ERh1)b-yPB^(jzfQ!mG*6 z7t%2H=Pit_4`7X^YiO~`O)Tguz^tkJadg;KMoE3l>0~sjHW}dUvwaY~cS4M9112{9 zg|dApWY;ah+m#~_R}{wntjwi>-YtmRB7$f2V2T|i4}D*0(wX!PGhdY<*d>Y0`?VP! zmY?90Z$SRN1!$DM3Db(z7(HPL&U{RT&$+8eyK6+-etA<%Tqg6A--Lzh4Cv6|ae@w= ze##f#f#_S(i?xb9hUK50(SNFI=t|oe%&~b+ zIW7A1e#2%`>-VCmZ%awD(;f|q-FUysfGkR+1z1~wp8-FyO7|7|?M1lfKZm-rM3j49 zA0K~e(7?q?%&u+*?2|PS>R5xgta&uKJC<3!Ze~R@6wxWChurx`*`NP%uHmB zn&2`DPu$1b%;Rj1U^PsAj*HcR1bX}J4038i;C&F-+&c)x11-?GB^f%?)N!yh54x>p zEUTW;W`j6b?cIZ8&MVQkHk>uw%wVFu3ix8Kg137k@kZ_|6V%vZ@5OE;O`MLj4UNok zSu7+@J2T}I2DI%2j~erGOx5d$%=e>g`IB@!Er`XTOYyidSBjYUz79x7v16yj@ z8_XBvthYS9*@1?AtV)ogzLrq%&agFB^7ij^UAAK0D-}ODm<` z;2)O?@6qRw@S}#s`PZ>uT1znZ$Q%UA9mV*H1l%($huSz$3!j4j#*W2FpMhxG7|-IY ze^bebdhoB>q3dEui!;W-@|O|Gi|-x%>)JHK@fzDPF9kd5_2^}j8l~PHNH!PrNcboS zX5R>nda1BetH*{zO)y{SNegU_vHbf>(EIWSYC}TkX7d9oox2iGH1A1^NK{1%T-WNN@GdeIii=uj_(^JtSbo=+>T%iX2$}fY2 z-cs^DG?qrK5uw_#mgT)2BUlntgxrXQr1qs4A8TTuv~&T!9nz;w(=MX!t0c|%xEA9t zRnv>$LG(0J`<2wvUDk74C+rvpo&X=NsAK{xy2%K69@g}U1rkgNKnbw5Wsp2zrCNQaOKkikG zpgqG9DB|Y;j1`~tQj-&~|Is`se{5u?yA>%~*%l`wQ=$JunPyHmqW07xRuDHHl{b&$ z)`C(RKK~EYZVp4}bqj2DJCEezI#%0x4U3PR!^XZTcyl9(9dp`G>c7Uq-1`pvZ8k$y zp_<)UbBQ%KZh>`%3)-8LA(G%L8%6eIF}H5n$du(9e+XaGx=h_zBnGp}7KTTV*g#Z3#A; zI#Q{30;X)+LU+%nu}Xz_TxCaKbMFPknOX`KRlj0SrjLc&v73h=Jk z<(d?ea+dW(s|(HyOCptxFEDwV9pd%MndB57Tu6S0jU$qf5IUT;-z&o0@~bcysz&F| z4y5C&PhiH?a6E77W;@zXlf{*Oj8&dPtF#ko*3-*yT^2@Xj*q6|v#!)_UeEUD^g!l( z62&{0!|eAVcm|o#*Wv=0PC1TL#Syelc^Ql!*kP)h7VX%R&Tb}};bEo+wtiM8wW?yu z+S|l*v^b>8dSdudGsNeQ0jWvTpICDoIeZvL)z!rN$&_xt{>n~e2cW%aKPF1mQi<6v z`gbt`!<7?p?v@QIRUWXch5w=N^lB)N+JTpGVyx1cNv^fWQPpw*TV_Usi@eTgRTWdn zbU;hDF>*b#@Zj}yRINFU>6gFY$Z=0BZ`6fe&rf#AZLt_19H1=!)A-NjCDg4oX>3gs z>#leOLSu0^Sya8u?&mr089u_rxM8B^emR=HIiEs{v965}WoFeMe zUgBZOIp%lV4#Fjp6gy3qp1ji$oJzmK8g2CvHbR8^gC}6$qYn16`6#S{lyTPCA5WSZ zk@ifA5?-XUUoT1#GY9M?Cn%v$f%8P&g$3`4ikQKQ4y} zd1IQiFaQ@_L@?~XkDDRI?Dt>^1lyRyL`M8OES+(Czj#0N)nK%ZF-=L2K&i1j-1Z+} zio$UAbe^<8)As~c%T7Vll`%Bh{{oAjPz00w7f`a?jaMpHSd!0m$jyBKoe6=^N-06z zeK9_G1dP#D7AGR)z*nORV*dnv-t?UIwOL@pUBYK~NjlOoMPPI6Hr0>ughlmajCqoT zD z)H)a!&A_QVKa%Y_0jaf@pgwdA0^f`goDO%V!iX4h+~$roi!LBWGZ+4HZnVKuk}`K@ zk?ri^$lS?j^p;jE8=rviRV(pf@*n1~=L@sb+C{g0FVefS*YNkSte|Ij3ru&kVMfhZ zI+A&e)N2RBZssR=&JogyaZPC96{z5jG)cV5M(@L&u(;lZijOL!k*q=nH*(0k{Tf?7 z;TWF1*hKF>$_iRah6+ZgCLy#k3wGrVNPAQZW%ZSq+!sxQt5RVhD@#jzV<_|5G@7+K z0$pa!c=Fnj&gIDo{QTF`_8c85jM1STclu!1b^`AF1bS_xLW9ofP^r`;WR3bwfnDk( z^CF6Nza2u{4JqnB*M^}=I`s3xX{4pN&=A8=Or3g^itOi+y0ZulH}oke%o`b--Jn=K zSa4#*3F=!@j@T#l6tv|PoZ@Y$__PDv@^hm~@6T+o%U}vP6hodB>69L(MzhLH=#obP zmY+_+tA`V*|8WgD&f3S!f)n6>FW>5iUIo)q?qa%C-q@9ENxj=gGZpm|wrr{-st%rK z<~L?Ci#3)|FFnroovdcv2C?EuJ z7ENcft>thp^A1aIk;8#?1KF?PN>s46pGoXe#(B?=tn~%howt^3#ly4g?1LmGX(ElG zi!QUd`Px`?dKO$ed)SxuC2XhD8pbX+S)KeNL319NpuTr36fcB9vDAXSz3z-Z0~i8F zcd~U}l~#$xWz27|KVChU4)fXL(d-on(f8wQX2D_Rk#~d5bi8tU8yXRk&Kw%LZm7rI%Z&T%&XM-h3; z8j^hWd0aA|MvwIvX8M+6?{GcZrI3%#Py3mxavFZx#*x|bn>f@XO~!VLbgMZBhjVQ) z^7>^I54wcqKa8OMW+0vW;LDtu3I4s0g6h+8WRZ55j;sG<_H8R+JbW{TJXs7?l`kyt z-CGpPSYxMZ5GIuUMRJ1-dF|?C8*4Yi?ri~n+UHZCnmm1;n}?7OG00OP#EE*?hcUr$ ze7qNnh3S|#a|c^}e-i~Nm7?scI3#D}I>dR&!TQ@TmN9-a-e{;JVQV}duGWFWi>nAd zB|`}h!f-px7!?&JXj-s`MSjhoMEPp?7Mu{{BssF!(ZTqxW_%of1)F3=kP;nWf2>#G z=kX?-T5d?D6}b@nxsK4|QxT!=j7-^W5EuSZNo52=$AuH;YRyuj=R;3?Ph1EQ&uTu( z0`I-`>`UJ%^DCcX-D)dj{jehacTsq`vjsg@4^ZPVq8UnFKQC>D(@@q{%S(d0ekH9 zGDgDt!>E#7f^$9N>HNhIDE?8QiH+v;pj?iQS6sp~od9?gYvPIg5t`|!Lg)J@(43AS zTKK{S+q}lm57|ky*`yik+#a*{FAFi)WeH7|t)L5DOGv4IHT5=TK}t(f%zd0eo%SAT z|9cR3Ym@{tOGDY!xpL&TJzadBn$p|Nb>!1J6#ot%!%z(=8oFr^6$g&Smt9gc`nn`7 zG<$>7v0;?%agvOE#dvn%cg${bqGP?s1@Yw>&LMT+=4 zgzWc6z<84bIagX>;hkVQ+&Yhp?%t&EY7qr}ddz+=Sx&Fd#}UVnn}2I4xvwk7 zj4pkFf6;Nuwwp^U0_$MBFn~Vb4k|uK(%Ro*UgvWv%&(tf%X^DfLuX7mYseQtsA)diG3 z$%JgG<=UubhQXk<*z`pCUPLTaLuuC@B3h#@{=hEaJ>Z z_VCShcH-6`T(VY!qNs}LjrN7gLm3)>Pz{s!{9(3-V%fDNer)oYFiNo-hel;bOt|w9 zlABJm$cTkFd?o_Rx42`|AqAHDWfR7V@m1m400<5&#{LEAxMnYlKK%!5=}!-sk6(a( z)5%CYRE)ItFYM;kE@t~;28Qcf(kA{mZ45NWrScy${4a{n|5b1&f#_QImYlI)E>urk%>n3?_#74hHjE;){Q3q!E(S}caY(xKL_ zO~_8jgmd~r{Ik%(Sqekt1d#f4J)F##MH)qJ?8$x?6wRNGZHHCpb8}21z_tUmx(eNIXiOd!~M2DoXHG)jG{n&Vft2rQe zPz$V*l8{jo0O`fwF(hd(j;vJ1#;?t6icB)QZ@Ynd(g%>)CR4g^sZYb_USiv>s*=*@ zW>lw_;bfDJ9e;5}btaw%Y!tooRF}!k%%8RC#X;rb&2U zjHrygTMiT$6tUc$;jrB5ne@pUpulz(IKlTC4{Q69bS zGtg<##ni>P(W5JheI76#j)f}`^6dydRhO{+dV_Fg%t2hYk)Y11rFgP47t89ZSYMa~ zWUhT?X={?OlCwvGS|W1lr?I#9ZzJ#iLiEOEp-grtlJkaPWq2X8eXfY*8GY=gq!Ug% z2QkY>amZclhnEL#G9^!2SiRWD99j(7W`l>=_~aP2KYzybH9lg+FEy-m&A^B9iPJuQy9@vE>GPUm}m`h0Ab$%vh?uo{r@w50LDr>F9r{ zhlRiY!>KE(f|#s2Dr)|U&mjkKOKf@X9&sNTPU(=Am_aKC&IQ#JKtrR7ZjPEjn=J@h zbC2M*P@7bII%!qtF0v?GL-*X~V}rF5JS)fJZ}M|^WqwDt_ilQ-UlGkCQ>kN&B-yRC zq`uAwq{JUZcI+({Q8kL3*KEYQx#OwWGXm4oj$*O6bm9K01lY|t$EH!zu(~eRfj&f0 z``U2&@k5tlRRgH`Z8i4(+D`*~$C9gl1x@FwSdd*h5*?K(%`c72^$IY`U4qc(gTb+t zcs0Ra+$VVxU;g`teV$4Joss{+VcC7u_;z4~qod%4w1mKAX@q#*;NkT6DQ+khqsbv3 z#c|@Ft?(U}2SIX`riJgkGtz=hnxke4V0jWm0=ibVu#)biuBjE1$C`O@5VLzTQx zxMm4i488(8LnU0j5P&^(uNf^!VpC=(;Y93M=p;|U+wY9&^th0mv;x}KdSbK3KNe+n zgW0`{WYg2s@gTp2jXdE3)hoAH_{(&RlI9WdaTs>(_{db!6PS{4I9t=FLk_|KtgDy? zfu97WX6$9Vj?IU!t39NijzFlAfW2*f%bveZLEy?11e{MrqDdNDsGq4;l(C5CBk)@- z6$=a0DOaM7oCM>b*A{`pyfp2c`je(kk{1Nr+l}fWNm#Tb8!r_jTJMs`q>MoACDS`myR4t64vUWEL-= zMaKS=siIHI-b&K;=xhW=C*f0A3%nQ2C$*q7xcM9)<=yV=QG*kv`n!VL>rLH~JLvfy zQ|#P704mkC_-~&O8YhR~zT^aI-7_*_%=nNzcNei6gOZLOs*1 z6yf*m(I{GLLt!mbu%OqFx~Et{<3Tw3RI+ojGT8Ac z6~SeN5SO2@Piu!Gb=5r78oH83X9E@w9E8CAk7i-YKQnmr&mh+$qBPXAiDQ74^Pkf(U$*`*eNGdve+Aqc$*(^&5XkU#lhHbT*73Z3Ob?+mNk_U6KrT= z%~I-{@j#48-@$+63EccOmUbKZ!8G>`f(B2+ke8?MJZ(1mTYo`AGXWX1&8gA4m}NFU zgXHn&=u3P+8m3x;1f7`}mY9Go-r2b29*w4;h4>xeOK%s3p<8wU)s#=9BxNT`{W%II z-`g-ISd9#fv;|r(Tu4Q%89o|cjN3cou{{IuZz#d~(vOgBxR2Xi@7VGF^VIRlh5E*~ z!1+0=D_jjj?_!wCBq6W35jnFGpzy9&jD?KoNOBRLZwbev*i#6({f4bq9VA$i zX-*$18jzehot(EO!@YAl2@Z^+k6obe9rD=quL4<9t;Fx2k5g8X21yJsC#jN0IQuXh zeZ~zaxvEPZE55MDTk3H8l>|HQ(as8Qj6}lS9_%aRX#K3c?8E5+xO!Op0{6KAZl2lC zvKNWxm<^xUg%#FFEHB2-?PYL35Y2`xS%^lb06ZJGgC3tf3H^oBV6lEI8Xpc6`!t@g z-@Eo>RFDDc2e}~9LC7{~=uul?05lD3V0M2J4mIVokewUY%u5nD;_{RIo3#e#0@|5+ zOfp{M0PJrW!)kyYhW_YbuksC9%>^C0bLTKrj&H#p*&8^TaDqL2lz~{KMG*dQ7Judw zS;(P&HXwK{rs@tNq3Ldz)rqlI>TBk{Od5?g%Gfn&2&KMkXAP07aOCR30 z9-Rzk{~Ru}6Rm$(O`{BcZ_$N>SO>M;8if<_u}IQ;&93R`V5R&{me|CYiP}60`#1%i zD>~VZ;B1UoU(0;UR^$5m8A$DLf%zmO#{GE3QfJp-(Y3?aEbR)XV}`KaYX#4aJU01g z1ipPRf}`3P_!<~c-;vQQ&~*q>TQ;Kma39`l2Gi|DB4&JP1_a`}eqxK5KeIf~wvNg| z(j6rvs%^*QT}m`_+Ffjys%Ab3n;<3{aqvMB<+Q!SnV){h&z}G{4|C-C3!yUl7!nL- zVC|(6Y_ZtLl0Syiw~)PP+kO_`azc@*`<*?Ke!_B0mtwb`A?DwWgHzE^@L!MN)I}9C z3p2p%EGsCBXQ1O9Cz<&g0d^cEPotAI#cBZ zuOwEY^=2Mk$0s3jRSDjRvF6vcH)+HgPYhf-jlLJIqh6`AbgO{SQaJ~6Bj?iU(Y}J` z>l~=ynIgre2SMk_byQX@fcDIj81NS~z{!Xltd()A_z=lOk0AHhhq(34mUR6uv&vJF zn9D_@eO?1v&EDbpGEc(OJNTTPKwj5A;521nz_;I6ku?B|#+eHO1Ue)?)|tXysM3x_ zim zCy;v0#!+p(J8oUqr<~{>sK|~JOpMB-GOHSVU9lg63q7z{w-eg=sK=Y&>h&I=ytE**coZf3Rq*USA zYb%`ku7{;_ECeqrXArk12DaBm(){QQ=u5suUm6jw(}fcck2B#`OUnLSL{{tlXv{+? z5*nR>>thEj$UcCrv7>3u7V+$+(T@M7m6GuS1t^xyrZ-*Xw5`fYp!LvHIQ?rIo>vW| zIXCl3ag_@dw=JQ4J#Q(#Ir~7MM1phd2*d!M7)|5xjP?$l#D#xytjwE z6^!Zed0To?7ei)cakTAYGd-@GL`!%*I<~bFO4*08`&*u%bYDDK6s6Kl`6?=tmZy8# zih|+mdFpVfAoEB5kWY%GWjVfr35s=8gd_}qGi|HE zBt6@UJ-?d7vh+v7Ev*B?^%l{GMG^>-9|othLg;0$#V^aBta#KR1Z>vFs_0?RI+}&E zwj=sG(f(e7~qIInTtO^HX&%85isL6W_pBIkNPru{CbrV+A zt4^c*MxalTN7#Ek{9+aCT&n|n=4Fc4J;QKr-&wKe>;>z!Jp-SW$!Liiio+&P*u1I9 zY`thOduaI=78b*4#g;j!=>LMi^z&@r+agTumm)HFi&pQQEWK5L*Ue{8^Us!)rtQZj z7K)b7ZLHqR0_SG_W`CZ1f4?0#G? zi!vI3rp@JSuG0)G9Jm0wO+&FvvYcI)^JHqrhp_d3!boz|0z`(MWyTu&F#SR@8z4Os zYhD`QW$I8ocbLKY_q}6^(FI7D?1#c&Q+(XK5XZtRnR9|a>nvM<@HM*Fut@CF$oHiR zEorm_=Of`kDES*H3cfq;5?uPIhV!>V5wlI1+8ZuF^P1R$lzfR)Uy8k7F9r1c^KzQ( z>_hI!0Vuq*ilW|MfZ>!lL9vmJz@YIXX5HCK8kyTvOZ zb!{2$Donyl3r*T`V*n06D~1QJOgARTQDmPi1iuUL_iicHwj7~C15?^xAVG7pLr_|o zjoE|k&{f|H#LPybWISG7{Up{ePLueB8_qu*NJGjhu+2kU^E$PY{lG5xY&e46C3#}q zTMoHeQgrpvJFH#l2CKdK;s};s?03uwc4KKMC8Tx1Y-I>qcPysr<#X^py8t)t8_}mV zQgorVgk5qCKd=51WiwW5(*7vQ-+C$1e56kw3rm@a zTme$WSi|IL1&MY`)7nF_lyYl7a?H$-Tw;TLqFvs3a>oKY0AX~U?Ct=26ypv1B z;Nds0uBerjxIADVBJ6P}(gx8>-{Eu7Y~*adh6Nkzu~oPiQtPK8-*GOUyKP|4S31+6 z9v%8R<2XhvWOUJ}igDX7;%&`492%1gOTh%@uy`i4$K)Z>fFs=;Lt4_+g0o&DQJ=XC zV*>3F*&{_U-SJG@FcC`>uUXZt31HiXUS;VSC!ibN1h<$V_U%MC`}*cHTmI`Fi;tMW z<{X&{ZP!5jLruJgwMu`laF@j7<0_A+xaRf9uBD+Vg;#lW;POe6m`Td^sf{ii;c z8NS&MPgf(%+^PZ3<+)aqjkPJwRU6A8v%qoJH} z%}OD?k@Y+_#L13%(2O{Sf-lRt{k>|<$g+k|P``w--3$$a9hpz8Y% zC-ZDb>sUYCUHhD^y^sl=&HErNvlN9rIK(SvxX_XyG$Mz#dG#_2`YQD z9ZCU5a3eSzj}yf250oMyC(6X~Wir?@Okl*^{H0tL$X zFb>~-j;D!BcH&jSS@i7rgAF(C;En1t7GY6~tCK;!8g8`0C>>+}ioM2*v&?5+Nw7|uzM4I?GhF1F-;Pv?xh_cSat&8XIB`Y4OH8PYP z5sHz4i{ab8m;QSBkjdrc(0%tB6NVX*qqn`FX-x#Zexpfav}4eeA@(d8D5L*N6JB4F zpv%I2lsCSJz55+aUSb-xYU%@A)=xl|WIGeb(y{lIxp<+q80lVLaJqdzOl?XKUw@Qt z{d)jm#sc)H7o*%Vn$^Dup@!ITv~au&=}51ns@st;YKbF*B1QVemeFqW1QzA;0IOe? zVw#gJb#~@rp0y&i-Pwft7oGUM$RD}pIq17)N>)qKsJG}MYw9RP`{xjvKHvuxTIvdF z2B>1lrh~Y=w;i6(uHm+g3erzsAXm%t$gZ>@=Lcfl?eRcbE_DG79qUM;brPj_P7nlM z)fSu#R-xM(-sD;I2=lvMV`zgXCB3hN`#>M6cJjq@x#fcGK?&qGBZj)Gl*#zb7bF$7 z!q`fe-Yb8_%Wff6rbfX%D~ObDP9`S@RgxMqoNs7iX#w+nph zUGCC8z%Jj~&FUI966fed=Vt#!dtwDWxfzV9=bBMD^C3=6x`<3A1$?$Y4E~fH$<9!t z&Hbu0eL*Q+f2>BA@e&v}%24<^J(_sM13T-MlCh08+d5SdlQeC_>-;vXo~@(NNoCC4 zM+FzFCw(c(5r3 zGdH%PzS0JI$4BFD)ETsHH$=(u>oD>j&nyL#sUdYCe8xoM$d!2zHauZVOe9cnKL*+7a3_NNhul{NTrlfN*aDisLV1lA{8l7BBByX zMv^2+g(zuJX-L}7_5AG*FLmG7eXeufpZBRiyxMNu*ki|x+}t7c?H0~wJmIqgDbkoV zfDl_v?6p{l@hSwtq)Yhr`7$g!4v}hR2U{3xik~@s(ET1qHR>g5faZlh1Z^B62HBb+BfcjomLy7y@P0^yNcla3`u%gXGL4Po}rT0 z90gu{R&&ae()GVX=}$g=e>)y0MqQz0$$n^9dyvlS%F$)@_wX^5M%mp;)aIQ>d!iA& zlh}v*w*qKpR}yU+h=S4T`Sfr3I=EbS#4y(ivZ;)xdl4V-S0jnGF7$^|pEW7TU#6rn z$H->9E%HN$QRlvqlzu{3u)Q}QGfgdMqemFh|FuEn%~#ZG7zh$`DzM6jXDR*-u$Inf zRDU@Q-C03r1KW^N<&Ftf3u4n{1-u9i_b%E zbRxNCKj!b{3GD10hh4o=n} z_}pxgE}KegOAnJ-M;iJUB$C_i3>rH!nS}J`@w00jH3vwN_jOf)Sdh@=Lr$^F7DEOjD|BY+K8M{@uI1rD@&VzX9+KE@e`N${<#OyiJ zq^qn!e@#+h&=ZZS=s$c0!h4lv(gLOTv&eM+V)Qyh(Dsef@j|KvB7WvHw!9iMrFvPm zoD3~$NTz>>zu;8WS&S;bh2SlVFtknwnhPh;L(5?lvM~*jce<#}Koh+M58-|%0rt9= z$^X=Kx|4hc>OKb`%XymjONLWd>{(i-j4Mp&eg(Dq=DU^glh~J^F);&N#dlc?jTTxt^ zIr}@*gl=$VR@mO#2gt8)-Yyytf~9Wf92L9!)n7IkEfcvWVS$1+&A?kg9t*B?+amoAI(3 zT;PJ#EtZ&k_YCuUAWv@9>PWZE#PKH@w6`q`r$`vx*M^Y&uuMeWJ4rkpmLpFy2VMQ?m{t&po?8kK66s^7SSD%7i&4ZT0*jENf4)bMEs_kkfb&q~ z9QLTd{Y-r+?~CsEic+i9)VHb;YtF`_L1{EBFbyYvbaJ+i&+iVok>>(y!3^24h|fKR z-DzLoHFCe8T(O+0cZpNN<$64leS%>@=Je7?n`V@c6KFLqCA&apD!-;Jn0GspWQ}{V zZjl3hSy9FBWR`+zH&ua5Oep>Jy-VlPj$&*38PpCYQpkw~q}9M#e4)>*`jfEW&XfH# zbagouX#c{3eN$*$ViGbv@@WLWBj-m*3#4rFAg$3%bM$A^vhuNH@}U8lm4^_~sS3@% z$H-^M1i>O&M^R#P1jo9KNoQ*!ZJHfI4*pVtpd&|l29rN?TbI-21wHi9WglsHJ*5n# zr`WR2h&u9=NX7gOZP_^rF@6mwdF{ot@_AjKu>&>EZ5ZvHL@)1^v(oJbxZTB2qqh)w zS9u>iqXAAARS>JX5&udbVy|%*LJdB!03#*L3FiG@*C5JNe~r~wBVan&0Lx^7%%1OT za#Av89|^#O8T>vQ?Z>P7@&&^&rf<@CfZ;nJkc>dpjreLp2~56OV6Z z@$DP1W!C_b1D&X~ zn9+v>zEi?UtavmjEULnfxnc0U+Rfj+-E_*l2D;L9l-gLtW{AuaEc39V#0*W^weBj` z&G^V>YmbH6mOP|XN8|E|1GM0I44kfXmV;8H~EVV_}9X3B8jf ziNXl@tq4L+ehM9^mu0*6b+G)nK8VIx(X|Dl2ncF@EWvp1f^(_dNej~782|F@jI z?ikNv{=tTlYC7=Kfl6L)K!)N$ycSj@8?Rc@J5)%Ovp1lo>lZfgyy>B;iF~d+nZ}hq z!<^Mw7#+~U>Nji_%&3on*+LysT)h|$8>d2Irx45%-r>@iqfpJ&BkAgR+*4Re+SPTK z*Tc`jIS27;MK>!O-NmXRH_+1pJqnZ>PP68xQ^u*Eto>OD%tSdWzQzZmtkYOskr`61 zJJQ|rzc8)afehcSrS@TkIQDZR@@MZrfb3klJ|c@fT6rCp>h2ghY!78smM_?y4^e1y z5vEy@RxHCJn0+`}!3vu4S!v&5CS*SzTb^8G4u3AQ#OJY?VB(KfId3+_`W18it%`Iz zP=ydbWMv0fhHww-iOFRv`r_H&e;SBtzsFAW8o_7Nb)$7RwJ9Y?5h*_En7S{IEwIvN z3f{I%`1)g(GIl%bcs3I=bwU{ZkVnwH)tEfLo?Sh)gPoh4$YMUeG5Rw7Jf;>c$A4fmYI&FOSD>7=${>@7I=i<-%tr%!E#L-|2yecndzn4E`d3F_S z^ix%6B$YE|&Z(a8dT-R|e~V=W=pc~y6RT}TQ;G0qw*7knOIR@!-YO!v9-hc%`|amh zr~AzF{8$`WbsK|{k=SgP%ao>QV4~Ik=RHoeVMGH)?UF#^eI3X*++s)OUS!l0iiVL& z82U8{Y0f(A-qs1^)cl6si1Ned`EQu$`4lFxb|3SLZDUH+y$lnT@cGth_WMaNYV}@Y zDn7IEZyTAv*$Jj;CBs|}|HX|U{!Mydh>*fZoJ*R|Zk#v9=cjI1dv`V(@?@Bnt2}gE zR-)cB1aW5WsNdp>nvD%?LDvhm$6^!`&3EGb?Ql3GsZhpXCM#__fdA^RW4c-)eXHLq zcqGHw0G}yP_xKI9mPF*2dhpr*Fv>JEg%zI(7w;WQyLirFm}&|y^7#yQ_YfK;ZzNd! zJ%zmeUjk-_FiH0&*bQ|&-#dWFgZZd%UO+=nUSd=7^J$g164iyu(pAm>Fh``G4Ju~g z$HWZWUG)^BW;Vg6j%SDd%R|(zXfpit4+s0QaIS4EOyY&GQuG(O9}}k7hd1&1AZTSy zKBC0MNq)!-;=5pI20O!)qUMr&_J6dxMvP3tnqY310j=KS5YdXqg|kFupAum|;y6@q z@zU+tZ`Pu*72!t4v}-)S*Jo`O+}LIen~)ULNDGl-Rw1tbJ`S%B^7QMvFxn=Zh7CoK z)wIp@xGxdye}5yzO_^riR24A)c)HL#gr+JVK<_bmT2(#^@ue!1+;s*g3+(B={5K}@ zqJ`SFenN@A9wpyD4x2OX$a|213k}VP`0tK7 z@|vfhQ1%hsNSjB|8ab#^wkD0!vG{XKkwRpWXyA$~-Q5404P8uB`ZR}YvZ zPmd0@`=Ex;OGR7SF`K{twu;@%Yj+JIiYKrn*B|Ug&;*EYx`<`Gcd#z#c@JK`gcuhp&avZK}q?ibb%Gf6pdP zmB7qzpP1s_v*^+@fXd>x{LU=Q3a-o23G>NN>PRJ1YdWL|gbOjfYEM6z3qNAnwlRYQK2z(IbsL ztu%s}juy|*yO8|DWOiJ;j!mCv4b!<}(Wa|_jeD|?vi=9lsoH}4&QhMe%s`r37wgin z#2yPbv_CVa8Aspn)~*VQO;_OX{NLygOTx!6D}v7Mzl2Erri7OR#_#ofFx+)7u#N;VumGj}qH! zl+XC#alkGpOk0c0Oh!_gA3=S`qV$mt)Zb|!C;SYneiTj1trW;{JD64y)kpDrL{(Sak78$9~ zKd4D@%M_^m^CcWF?Sj&~EiijN6Nl$+g=cFH{0)xcpxG%3fBlpF{4ZBU$@i@nd?c_!1Jng<%8@MZ!HoI6JkRdcAPayZ&rj?&iKiIleZ zf*}2163tqoMloaa@nqc*RFwrFP$(3^<@)sGg)Xhse9uzX9i~&)TBz<)BVuZ&(#OUt z%ui$+)Q_d(6Zf&mUzVamkzdfMNx-Q0$7y++J0<_!jW+K%lD;wo^LIH3GT*q9nA0Iz z%{PW0u$Li0OdF;9+mm&q7pe1ah49%gcz^dGS^cLj$j-5#iJSw_8FL38hKQ4wv^Xg670+5iF8%6L|X^qki`y8gb5r{@qd|XFFxV zNP}Q{)7?zzy*=>X)+6wgOd?Aw4*~pDsEA#mJ^2YFBXXP0ol7Pi@tgF@w-t4()CCS* z7f7ggG#Sp%qRUmG@G#-om8H&tjaNMc{{45+c_2j)AQ?t$G{Wc_J*4g#i8Pl#?>m-Q zQ}B~9g7y_M0^LK$>Bd4=ihH$@!cB+M1zRBjTPq`2JWUSkeRw}8dNGT=+QSq@6!6`1 z4E3wer$iX=);usbPXf0W{~HgKoklw(*}W+`g>9 zN-rM@ly1Q5m#XM7UyVxD$4q%cG#jI}7#DoiA!?+H*()Y9C&M{3#y1h+<}REc|I3!n zPh+0r6Pf680ZO)hVwWpCFul2-WgpuGnM-@|L{ks3B}J^F_XayK%!J)GSEomVyr%D) zit>T?m@4;+nHz3`ATStX)`w!AyaRigB!b5AJP)j#fVmm}VO(ww?q-X^QRgV?v)X@P_}MX(_wpP2mUjf-*;%}fils?Qz9PEzF^2msN6)CO+(qMqSnpOGPl|(3 zYXla#N3#;?Fp~Zrhxxo8CE4&2+oL8jl?_?Ny9+| zk)NEgCTs=FI~_4}=3kb?8Ozk80)&6wgITA0vBFc7`gJ5R&%y#r^IqfTrxWydmo(iU zdYr$zX&7T}iHu@xC~IAZQ}%K=Og@hFUt-xKgXJWroCd`gw=hoG3mcdPrsub@@Jl?O zt89WZImy`DK&bAmhgs-LRL7g(_F^lnYVBa2kw@A6H-V&A&+DDc>F_}r3ah}=728|VGF}1y#HS_xKSO&iz-sIokxGA`3mcsfz z?_rZ3$x$X}skW|JfJxzMbThe|CHMs4$fnB>kxa)+^YyI%u zC@Sw~BN`i-e*aoH`3>jZoR>J#U_{Th%w_Y-q;XD6f<8%y&>Ne3lr^rK$;W7;*R%-L zBTEsJvjd@rw$srp4;X9^rlI4OQf`+ql{`F+Bc8WVbF>G)Y~tzZ_OsM5X9LbW9ZQan z(oy9<3B%M^(_`P?c<^it>8m%gX|D_f>7_5B)2l~5`}DBq@&t%V7(i-W1tw; z<=qEEqoFuSAYF~!Y{-udojCQyX$0tJZv8iR(A2IkUw6c;)8J8QymuuOJp|)q zEYR~K3adseq1v*1I5}wG;Co9vR-b@5S+AJ<(+ik#rH5rJ_hOj#R3_CfNd>K=;56hU zp5*>ybGRSOdB;`e;y4n+%A~l@WD4GtG%}B@O?WRPLTQS<{GO%=+f(_>xa;U zwg0~}>5#_#VkUp-8rd6&Q)k5>{~gDX5#|X8-A;DwbuqjRBQbFE0|tL>#+zN&q3h~M zKQ(X6RAnEnbNf4<^bK)GlhKkIJDS*!YcKA^2+Wa z#n4$0y%mMPO;@m0XCJ0YFU4}7M|52{6*FcxL*lRoUHyH4emOZpEa(~X)~QpL;2-s< zIn$LX`>8Cr60trWFe$V^(P?v>}P9@AO5a zMhO{~x-y}Jrx@7t12J}D6%260JFDo$LzI%EXgk)C0MXWK&KbS z(ag!uXkYwLvRflUSB9rzyXISTXixv$`rt7)Y2r)^7IO*cq)lO!5?;`ccS&CtLdVl=5%}#hc1{zf zqOdM_uDZ{>ye{Hip&KdQnM(3|8?a^QMZ7=Hv+##JvB&T-B!yBaYmYv33@X7lT(GOt z3s}63CLSy8r0v&iDMx5Hd}YU=k#i89FD5`o`~sU+bsUacD%e6(OJoX9pdhI`a4+{| zrs{K``7{Kg#Vg46%{Q#xunWcZ3s4^Vhp}}zEbiGtG>Yh8XG;!BYHe5npC|2*;xnny zL6A7B2Is0Oc79$Fd)P7&Dc3r=cfb+G&&6?Ja4v*{N?^nDM0b-Kn5|d_3sV`-%KI$n zuyhJECb^yb6R0v~2a(iWaGlT0*+>R&(Z^Qy&x2WzqT zKoeY-&w|9_8+erw!`Q=Ex;A$@f;C>iRNWr>2K~%fDVAj?k3*f|I2?DYgz23z2-{x+ z*?G^nH-q~ldG6@jh$7ZM_6lo}(5LpOO6YBxhkue1q~n;+?#&3rzKz19S{9D33`P&UmcXrdNH+-%f{2AviDPY|NWvF^zgXzUW zIyy{9VC5gf=UP5s$)Pa06N)Fpm*e*>VcNa+GmDBSN6Yhgk`fgsiE$EW7{{4Uxd-T~ zh@>UXoA|s=k{&)gh~r*-X0=xu%5`yYHouN1HL_$`qr>mF&b0Ml6Fj!7lLB{0%rH5C zOVUQT5togBLEEr;xeuzpTA<-l9VS~&p*E4(uuKfVJIR;KYDG5tdAXdTjfLozbrNJA z?xqc^c_zza0&V>I4jC32WT|tGeHt2t;vd7Qb(wOtZMIn&*f zhroN9d|oshAKeyVL$ZqCi`q7N6Z8r{?g$HxCiC9cX&WkZb)vrmX|zkn2K{>aq|_Bl z54=Kn{^2k8IzE9x^gUd>bO5!wr^!KvXzbY}QtHqaNPX&Jv-Y-NpNfKD`h-pzK1;}W zVr&Us-c=@v6Irxus4rOy4W;#slLc!v97+0bI=znKbFyjLf|6T1NvTbOI$B=R#c4%? z$BPODk9^gsAoeC5@4i6&247%%Q%~S%o=+cS_R*M|7h$|}x^cn588las^WuY5P?IpD zMSklkaY`>WDrVAvSOcX8@1lUOMuLKhR9f>Xkgm^IPuq*;!f*OHTphMtaH8H+5YFc$ zhka%UoNh_ctsEy3bIzn8!^Y8r&TV+86iv;+2L;C0-Dvc84Z(8g3Iqqz_|Lc(>1{N% zNQ(=y_kUvF!W)r2V98SY-muz#lBk+p4HM2MuW9yVKU1n$YTjt*r%PkOib}S~BNS1s zLot~9k;U03;rpCqj9!?@hF>>;qwz`v++0q>c&6Jrbrznym4foPdiMHj7kl?H6z&ds zct1QF4}ylX&D;Z%{B;z*=mtY}*$7yT4`y+(zHG^wNyt9g%DUrR5wrgW`?GKt+WGA2 z_*+Tr|5(WU!$sk#ros-UeS>LnF&uAk=cM9A6!e5JL8LvlR7E3<=VeOe28|wiszD}n z51cQZM`Pw*lv(Y8`K-@OW{3#7Ux_2d!3p&47u!|O`HNG+>|%2QQ#m>g&T9*BKfs23 z9ar)UVIFhbA%vn8FPOusRJQP2B-ZFYV~R2w=)GEw^Y4S8qrlI0;Yi zyN?h$DzCAiInxpSw}^GQ@5hpiVnPizTxmd2`t^hUMDtmz* z`PEENCBU)gn_x3sfX=r|*!KA&Fg1hsM<=JD^pFKa(+|KfyN>m3DrFY=KbX{obeM_c zp{_NPdlT)L8!ds=N+~jI@*zFFd^-N6fvGncK_YY&icKtGJn(^4y-FkPq9QoU_Tx{$ zGz9_4vx&N)tUOoMA`Bt{qZ zuzM{DEKIF}hS*3@j&%a6ah4jB$KYl{0r<)^_>~s%9A6`QY#xi&&DP|Zc@JW_a#X%) zC@l@-J&#}d*mU(RHWoj^Q9kqMdw)rav!@+jxqGd58^l-3ka6l6YF~Vv4Raiaa_v

sKJcM;opGtTF5UI#whcPU)*3;oycWyceBBl~t$N^;3_yBefcR z+86kJc`GZUAlPs=>9oTzLUt;e|7BsdJ>U?w3S!sdQTMu^f7^oLl@&~_{(Bgg)6hI0 z6D(Y_kdFCF3It6Gh~l~Ql3QC)ofm_EV@f!2V>XG^I>UIQT!)Vv9#dJU>mA(YMMw|Z)w3ypb%={UG z-%Dd?_KHz7P_UowkBor__jrs=QlyKIN70jF8 zF3;3P!8sH9x8OB9d2JF-o`^+2NC62JG*S14g?KNq5ogCo;MVaVJh{G-^D|$eB5sZq zCj&9S83f5s3e>rzo23uS!#af){{QoZ9xw5y*MrIEw!etm-E(nl1?O0;iZCaKyVSS? z;c{6jGx#!-YWd70(EkTqQsZEHo1w981ZEfUtfTZ$?gp2qj)Q9RSV?X2iZ&>Hss<`th)X zw*SyX&XL2YRvF+P$0Y*YH)SNRd^US8wP>k{4&0aZ zk!0T@n$np^3Fal7sU1TFS95UYZ4lk(PN)~E>9i+z7lz*7L`kbw)7m|k&?1(H+zs4E znk|C35z!Q(_@16NstCmdU#m=T@+q`Jmc0GzO za;DX}MQ}ID$F5|44!3zw+B`Lkddl<4Mk?&$-%o6Phb^X?g8o}Gh0o^yV|)oD`_!wC zj~cy9x;lh)sHMT@TRTfDm4kfZb%b+&`lBr+?8~SB@HWyL54LTfp>F-499=ArpNKl(&fhPKK(v%OZu>OjiQ;#*ygCw{P214a zlL+0_6PZev92O!71J@eSxn?cmO8LEN=pcI^`Hc5Dw?p!*J4&V~;fq-b?1Z;6w*x|` ztNsnklP__{Cx-sbJH`H+F2LiT_9)L)MdXyPY%TYxUG=`mzIYtL#$GG({=vUr(XZH* z<{0jiJPc`(>olx(6W!+X@{pfA#}pHdvo~L{z@}uJbTomH`gWM*9b+RVo#MMAmLX&3 z6Es{sg2Pc#aIuxa>eneb12ToI8X$2o@)FgF;+@x7S!eiB|y z6~^#=yf-;e2ftxgF(hI$1wQUz27j%wGyV{oEK_KQ(MJmSz5y#$C9ylg1~p;B(Hw1v zcc0f$z1L@U|7#*HS@K%$PASfQ|HeeEy-@tA0)3@Czd7qU&$|%4%9;x8Hg$;ENa7-A zm(7n(N2vG~C@z)9W;#PV|1KfvB}#Ou%pM*4GjQKn4M_*`aWNzx-^TITx>X_jRhEsH z2Zz(MsJXav(gC7A+u+~5m&v8CqED7uR65)NE=xU0=|UwGf)Y`|UA|jS$&#k;Wwsao z&^;|n(~3(m_-z4lswKQ`Lj8A1aW7QU@}jcdbfno(bhJWQMChaxR>K*@LjSU zIZg0kkvNP7HejsS7u28F3$ut}IQcx3u4Ju5{N7rqNrupa$G@@8IvcA$eurtLHr=)z zCeV@mN#|2;z<#kR9eGfO>k>N<{?nGTb+O3L`;19R)9})BrQmq8Iu$y`Q1N$9WLn)v zO^gI>RWP8hZ39>|z`w8DZ@)rr68)NFO6yPa?@K}%9<^#=_0I{gU)4{66JqEP_nql~ zEu%bx3`mBQ@%)YiVL>0-j8j?G8eRIJpGmKjCXmP#IeKI$M_vVOoaOVwbvGG$b-|M6 zJ)et`5EXi`rG;HxFcL#_mSIuREXtdDjgDIXVHz)YVodBd5cj=pZ2rgo`ToKxUxHr1 zZfO3RM-8nyq|n*KoHPSayQ~`BbI(!#XB)cnF9J_OIB$DO3x1NV%r$vE0!4S@=*JSI z+U{V9Mw{v8$0OL*bO!r{U2!Z_9Ky!UY<;Rb9;nKpY(xq^RXN~H%|*D573UAra>Snz zgYu9z_T{G|BijT@=6Q!ZdoJMf&rvk1wTWeK7NWO%&cpG3BqEMEvFU+JK}Bt-c|V2f z6$)_RDreuP&%~Pg#kj;~!TQ-V?EO>A6h`~wSzM=)(7Xs%qsix8dgtK&U5Hd?MzU9j z)0mNm5O?ZdWzUz|vioux++TK#nT!8nv)32FHXss?&t|jIRoB^DM*%|h#c1u=5S%o< z#mb6=5o0O>5yu#26Cj1r&u_AYSL>ONTep#_hd4zblSNm}!N=M%7BD%ST`u0r7G%C+ zC5kKA!a+H_dsE9c2TaCu8B=6l3}#tP#calK1*SocY-xW} z4ei_?dSJ~pb}M)Z`ddd*-&0Sz+j^HxI>YxPe2{^Jv@8Pebh3~myU`(@!d|Z*flSU@ z`4yi)lWHbATsayGOhVA|VH#cG8x)6i%s~78DRBMzfE`yq#p2ww;cm(IBs|@Y8MYqG zZ`UaDmp8%ljP-Es`p1<2^s}ZpiEQ6hJzS4|!-9(qF!Nvy%WZPRBkt^Rd-j11-`B@8 zjkj4?z((fcF^l4N^1TBK4RLROE)>k_*sY2qe9QV8{FihX`tJ?dwn>xVu_+8PABRxz zk&&1;As>IZCwb^?X-NO~oBc|u#dB8?+Sr^-TZf9G=Yj>EHq|5CzJ*RlnhRWxq@gYM z3dVTcLwxxNvU0zG4=1we%d=X%?tO*WDhHmmwH8D%_8xm!^t7rfRarOVEMxEP zYC3dz(sO((cSYIGS)|}315x+uFq|z)%)JALRQlMO#-ysxq{}JC{~kpnbjq6&l5kZ_jT=|=0wg3eXv7|fY3R~8IOlO zVN;fzhsBF)tYN$pM(IqW{=fBbJ>JG{b{N8AWd?erw9N%fg`e4$P9~8Jsv-9E^UAJL(B=j=2rXt4U1L zllxR7_}+k&g_Q8w4oCmFA^SrS`|Q2|V>Sw4Wbqh9k8|0Qg9p%l<&e?b)0XVi(hw$> zVh7jQ2;2;nVGDG}vht0~n8j8bR+4GK7G0c+7D*qrdRhb1YO&;bbaBLsim+`Zq3r!A zA&jt%N8_G_IA&DN^uJ8Rxfh$+xvGh*e*bsYXKKsT6}hKL`m|B=irrYeB$IW{dc(Bb zT$p!_HY<89&T5`Uvxg^LSW~emZaC|+lWL-9-1UytN=#u{Qw*53=r^O}_ASn{Z9PN6xXTaQNSd8?dW4d3qokp z1YfK^at57)jToZ!18%k>aX)MhTp|ikw8)>c%F{71Z<8dW3gdDfpLf$n7aXLRC)186-kAZAtg*1k^KIZIu z782q{(vg`b0eMaC+f_qKofWLY3-Dq7den})h`G;Bu@2i8Y|=l@0Tv&j;*4wLwDk!S zN*@B>(w+DqsfoWWH(7oDWvE%*U=Kw$;ss}_H*r?P`b-b|x2_4pcg4WRo^rg29msYoveeQ4sx_WiNTG} z(R~(GpunpX-VPe9Xm1QYo~Y&i!(Pb71Y*JAl`tt&LU!p-R^D?C>%7Atu|gP|)`sIr z_!px+6JpqZ7E>`+VIEEG981S)0$IW8HkKa#j5!92py^&Hi|zJ7clmW@__Uf8C33Ij z*TeAeImTKN9Pz1SC)!46Q|r5RSQ-17Z8TTM2TO6-=@zhJwJ0cW8(=Z9YoNEL)ktO8 z1!PZl;X9;uL4T;fd;(HRN%|hkw)o|D;3|YIYY&jfR zQNw^yUzs%dER9Fkf}gDBP8N5M7qXX?b_hS{hy7MDI2|O!GJ{6rmF`;XY$${F@`bqM z#`pI8t!6fLADOQD3?!vahcRU^xfS8K?kUIgk`A-ac1i5i;|v_*?#98$Sxj}+4rYq` zOt$+5^FEQnZy3E`=@3Y`VIv*V@nO4x~)^-ND%gR#foj4tdbfeYL)*>?`+8m~vs=9%o;7EgTW z4u;+DIdFIEGWtB%9@lit(Y#v|MK)?UP-=i_7yH?=x6fF>joVDDZxp1@zhHCrWubC; zozb+R+t^g^A-K|X6$55tDZwOyU1`Z@7gGA!uiee8dq_5`{~HDCgj;O6f-K5@reaS| z0-iogV-ov#2K<{IM7Vp-RI35?-2dzJaXOX_sbS{xlG&cSS_m!Y-ngl}k6A@(mwZ}g%hWzjy z(BbpAu8+CQv-vMOetaqJR!o6tiUgTVvS&NAM}nW?*f?w&iHRH{SIwb(Z;UYsiUHy`=5;2xUCCIz`eEm#W*C%aIOd{&ER^DM`a%%qc|F zj>L5F|L0{yspB%=8zLtU3KYlgxl38+g($kNRR;5vR`dm_(1B)QJgDe_&5@fJvG)qX z*#TBRX$!7jNx;8$A=-5~0SZ-jad^vY-hWj`rQ9-DX7LPcd^cLYIa9hrFpFtk2Nkzp z_$L=i2Uj(bao-ZG5B9BiOi)q{MWOfA#a1|m%$LV6kk{YJ=@dxM6 zobh*y6ZRWPl5|=F21?tQ?Kv&@&O3vVNu%lfjwY30n9r$tT`8 zddChE7Y}8NUPM#rTYjz`xqw}H6|767nR6Yh*{!TYC``4$dDBe%IeLp(oT))+FRyV{ z3E}L6XY8z9I2(}_%a&b!kN39*G4RL;syu&MUf;_~1Ng2*{Y2D!jKpWut+8ty86Dgvh^Wh~s~ zBJ-+jVSVF7(CMrX&){OVVkqAS)Rn}2wUT(M%za1-H<fsVg7PN;sxWFtGgT;_&{E-#o##S3OBX^Tf+vvCI@ zn4hRbk59NV!Rd!A=)X2b7Tc)WLyvYWu!iW+QJCDWg;|T|@Ndctj7`)ghiyY)Cvh5% zTlFZWyBaV47~`f{G;FSO&#(Fnia6{;U)SZMZ{i?!j;Lg7)|N1>E}o^R$bhfWHM~CR z$Cg{zQN2Y3j$KQE(0oIjIv@wJ)VtW%q zQh7G}rlrk{u1M2R?s1sA{x#ezmXO%~X-Kl*`$sqCLU^Mg6-v~x-6A6}Le3tV34amD zwxOr}6uxcx%`WXU!BM7ykSA$aw7nj5>^|*#u@Fq5C`=mk{(mW1oHReO<5X!6Q9( z4HcP>p9_%Fo=3=H2+NL0O9G{>O2~20d8EHf8E@%*gbZ8KQ1an6(IYS#y&5aB6SwLi zsjN~IEy+VK5~5&V`9CzVx)lA1S3-SxCvQcipcDQ$YI(v8EA_{sQhaz;- zU`yIg6jw8j{ns#_-6k#a#(us)ja)c7bB0Ax;y=THqXLrmABEbdl!MW52b6r~J=FixL{R+$*&dK%_wDh6^B?a( zu9nEz>C9t~Z>>XHCL`qetQB;tRgtyg4*2S}A0>pWL}EfXidr&6^qZ|>BXy+E^RaW# z_}6b>aM%v;x}pRBtS-UpfA`?@I}cPmRuhr2GtgME=YD_fA3Twkz=6|&Fvi~;4vHb# ze^X1)*h_h0rsN6qN7RAQTMg*y>_}9%XA_cy+dwI&iw22$nCStBQF>V`+U;=zeLMRB z#;i0%)^{(%<EAjLME8<|VTm+`2}?_^+|h z8d1(X9Il9N7iYjj%?V)oM;mtSiDg>f&SHEl)X|TGi*VrAXzR zhVjB!ClQbG+vU*lrENm z`27x$JyjNDj2|*f@2mog*$0_ljl;ku^8t7Sh%>F(l$rih3BqkXVU@Z8db*(s?D8$( zzj0IG@f$O6-qgsCrpG7u}Mse$yBt$4?q$g3)jTCy3MY)%$8;PplG8D zrs6E(=%$HaCX>bF)MY2$BY?s2zaSFP;EO0M~$^1dRH=R|3{hcE^Um#Nlh4L^&VnO6d>O2 zC44tJRKod{#( zO5lY~8F=o=Vz#%g1UnaZAp53JeECW@ZN8tYOsnC1j zC38rwf>{@-z}(85h$bf|!J@a$aO!3qT-n>p{F6z75$%3ps}m0v7d4q1KFYAnG!t0y znspR>VOh>*c-!30M027a)scw0pCur5_ZRa|vkFeO>|s{uL^0aY2H>Lm4;pMdk!G{N z3=u}a6UWJ5lcWI~AEz=?#eH7p>@$p2qa0*DRX~pOTH(^YQf894o9HW!fz`*xp}>m2 z;J;)O1m2kqn|@qndc$`!zrqmY@gpJHG6xQ)j$piZ=p!!&J;*3^gR;0gj5c?dnU;Kt zS^segjGTXgL8|kB(dcDTK8aarmx^Hc24&b+ah~~c&y9)i7{SDi8;07vd|=qF#c*qR zHKa!6Fm|u=;ZeB{xGS%NLTAL-i#me5_esDnkwFq}G?c1^Lg>SK=AKyNE=}uY%xvQz zVESn!J?J!>!>@!byAx1kkQ7_+E|hgh+>fU@=%CwP8<63Lq3n^i9<;nbWNzrkvi95n zI|+3 zb(5cHqNqpHQ4PdrI^w)-CZn(%OB19a9>|E?cL}>P9!C`TF%}+uPSjr)DPnis**TkJXt+QJIX^3o#qz z#trCW5|I0ePUg^yPfVY=4)_&|J3}*3dzfDd5IYz)xf;O+^LUUMI0aKSwlZm~3VeST z0QPs>(1^-l$h%h$dl(&9xmOub*H6a5Z8_*w8N!&nNC+Rgh6!74iDahDhKEw8;Gg*r zcu8epLd|bx)ZLXZB3u!C-?@Rx-y&w@W0Cp7Hi6uvsW8;V0wy$8F!%C5m}&XEgZ1Bz zf^N_ZXw7~NogxRYIMokcx-@_gB))%qteB$skzl-_4su`1qFK{TL9$fLsB|9$@Biyz z+{a9UqRan*)L#cAvp|Pk=I#xNS`&e~Jp`wl8te|s8dQ=c`qy8D0Y5twq(m+a-kAbD zhC1xW#YaGTgch2dunT4OYof&>BWT6-CO9fGE~dC;BJFoa(41W-(O+?Iy0BscR5Tm} z3-xfsTn~e?^QCa#a2@Qb$Ku~fMyaFo(QlP>ND>)zOq@M9zh8?!drw4jKF6bdi;4K| zG(ejVJE7e0y|8J_0a$Ho2c{dYGM8(ePzW?2pTY-V;vvgwSdB;CO>e=>aSGZLb{MUA zrUIG9(O^Ac9Xjr0j5d|aqIwM{^wZ)T)RgRpjj44IGjSp+EV=GCHQ<1jQSLQ}~D>4k!L26^2 z(B9sBFy0vho1*qYuT&v8pAj=DP$v`$+eE)~A1LoIM{gvt;N&L2X3sS+dX?zYYrlb< z%+1igm|__1w-3z`Gd^r9$D)WOD(J=52}oh_C8jL%EZnRcg@QWL(8lqv;bQ-N*t8=T z#<*^S*YlOo_vo=m+h8X6N@k%92xmfinwjtKO(8711ie9`W+W^eynGG;J>es=ZMH*l z{|LCR)e1S^OrU9Q3V41GL%|>Zz{7%XjFbF9*jfA2CtZ;VT>~Av8I-hZ6QtOEh6|^jLQ2yT zG{nAvvG{2TG~^VF(U^t|8w;VoO%Anwn+Ux%?(lN&4A4_Kd~@_#wv4>+Z+dL5~jkMjthY2SfU#5U63){Q_O7`4@&*1%)22c zk$+1W82E_UycOCg?As_X9W(?bPX7u%>75XJ;wbZ6I|UXwcEX$=c4%r!4g`+s0}t_? zH~h>rh}bAvetV#Vv#CUuMpB-hui2OyjIT>uCgDU%1V+eb$`!^aLEoPc0_aGf( z6*i`L3c}u#;r6L=G?_1Di`S*2MMf*x*I@zd^rj*d{9q$`+F*txFY3^^;On9eKn?|T zL^Ac$rNH}#sLv?2M|bb&uwtub3|r?XUbf($+HqQrxDcQFdD5+*(~z7Dnaj@$kSOm1Fd;>6Q0fx^{b}J@LKyY zvYDR&79%#HgpxGa`|t$3zy~1Fz7#^%=RxmRksoBg1bmlUp$mbgD0|~NXkK*#XvY_( zKDCTl^&}Dh&6??oT}9^mP`P z^=pHcrsyBSS&aP8gRuEgGpt!J2evITkOBvo726e=!6#KvZ$%bV5q(JARR@DD1V-(B zH0-cRhVgs=*bSS_s8fJ{tTQb5bPRTH4Th!VSD-On)a@TS$2>nG&NY%ZLwsfjBd2-= zQZ8yUE-C|zV517nVomdp_D1}cNX8_mjajr#8|E$QVf2p|F;ZbgaCZ1x=FK`2aehz^ zbWA54dY{IiwHT)TwTDHTheYph3v}0Og5Pyh&@I2sXc)yZQ(Q-aVyp~&PZ=ST zPfbwzKWFHOm=1FFDzH+^nh~zWGsc!;-jK)_iU^tmi7hJ7_;(Rh!$~lAkOj_JU}BdS zFaxvJGGWQi$i6WK7O3xp(7GHrHsKJHZ=V9Sr+i^b*JcR%tj`o~)P>@qDPXT1266sw z@LtqBYYbI^{S)pnJ$V|ivTGq|r95G>PKQFe<~_6EUvC74y5cjAQn- z>|#O|YJlm7i_AC1lnF1I1y#ODjI*hjEuy*|)>Lf;-Y=P%r0EYYQbt0(+$Qu)^ilp; z-NoFy)XSJGZDMjxtY&sCG6$)x&CE(KJ<-!SiV1hsKn4Rmw(87t;uywMSMXmJ|V%*O$wW3BeCtb|RDs2b{t9&)r9m@8AG#l% zW=!X-W|A%(VyfidF^;tdnQJ{pFv2T`k$lpDfZy{$|J!AzD?W?awZ($zQ2StJr}kUS zDOwE1^A+Ij{_Rkavxk|p4Z~LBdC>BBFDP_Wm>ui>#vEVl3VMZ`q1R0wGSx*76Uk)~ zwGtTTE$^6683b7~y}<9b63SYAozXbd&B*&tgH)RnNV_%~o%Wdsj-p=dOPd#L(cBL? z&quz?r+lM=wRlD{o|G&K33T-^Ykr(Tie# zZvpfF%ZL3{zrc06IdnB^LK_H7@l`t}KhYkkIp2khm(?)3XdKGrM?l?jRb+3J00AEI zh@0cW1TJTxQPg(zJ{7$fo@Zf5+I0xOrUfMuPmncXVUU|o)g;Wyy#Ies|bQXwHJU6?}bb8 zHyL5+JLY*+E=;_+6O9C2AkXxEd71hQaDIJ!ayx(P-SK znUFX)4Xm^W!^aX6D5R=je$x{sPg4c?QzwB5{lqAU*>G7sAz)wnhDkZ41V%;&n83*l zvo=r-eI{?^@mKD>pGAX8540W@+g-aLGC4#IX~P6YW0>Q=Q)qjy&6Y0Ao(lI+js$DCmSL5 z$q-}`DT79zT!eCFFNOB0qtGOq&&cE1ezey=26j58Kv=U6YPvj}{VjVEx!vty3Df6&f`)n?^7TcQ&Y=fT z@o6Lq(W-@8%W~1iUNG7l4${U|Xx6wq^w;DX6dT<_f|$3HBhF@5 zPd$dxE}udZ0xiL_NFI5ftV4HhWr)mbB~+6Ti-ygTMaAkxP^oE(l>mtk?tgJ-d6KoqW9ZY9<1WSV4zwb!ks+eeW)t4 zE5s4~$(t+gUcBMW(wG*PNj#aFYku7mR!bU=N_2(x(G&p*bP3u28@Qkw8#fZgg(*t zt#&6C+Rhcg&V8?#?(-_JV9z*FQ|KaIyOMa;&_Lt&rZWvk4B<1<20>VXwyivjXp<4# zbC>}hf)fnd=U*%sgU$3r)jt6(Zx3wPiefbRC z^fZ(uE`W_AwP4WqUgpn{vyhOI1fK&&qqT2X#`l#fI(|F@uFUU(wgMM;v9yaBWsuLT zSMq~y?la?Yy#o5goW4PZ*|2u33K}K$G9~KInRm~J!1FnT*~A_~dFkWO$gE2EqtYwR zO8+uuilfk%xqZ+asgGW~3}DoR&G0iS9Y9g!5^r&b7+JAa*s}0#=?d5p%YnY0$UaG| z1XGv9MjavpAr^wXGP4W6SfZ>A$Ps{*jM(?Z&dPs2Hntq>XX4Kl5t!Q3DH zaPD^l5% z=fw9NaMnc&2A`^CcD#NddaVpmZ0Sf2FJ?lzf_nAyF8|x4_zI+RZGqaJ&J}snI7>kA;Fd zeI)MYE$2tT-K~#6XLk%p62+X??vqF^YCK%rD}~VRQRu7jWOQGg(Vm;5jt=B%A?t&` zP}}%R=vDtb#PE+qCdFkKv;>2U^BN>q?hljv39U__;2K zF}n(`*QAk^n0M_SJ{&#FTmyQcJ>U~&iqyn9wszPd7^Y+cySM#gPFc#Z>ytu}^`s&Q zNvJ{bxt}2Lh!j#7q=z2woQZ4``kB)wRM4WM2awU`(I_xQ9t{o9L@_1V(AvEg!q=;# zbt^_AwNryp+eCS^GUNnPnl}kTcG;l-xQTRh{-RcOQ<(4(iP`Uu_1y2%D zzyvYt@v{}Ob)A4RR85fisXO4Zu^kdB|G}P*Nyx3L6lqV&2DQIdNYZo`P|qeXly*e* zkHs86!Xn)PWpFVc#~RnjpyZO3NaAxCI;TWHh9nuzE%^wq&Gpd5>sn}Mf*%Bjr=Vx+ zq)?^u4mhz|5gk?d$Q%ea5wkTOp*uNCQBkD=YCm%wja_g8;;v6XCpXVQ26u;{T1Rt8 zeDM)Xb7rEUA|LYK^czssR|0x-Tj6p2N+`>8LwQj)X!ZjwxDe9?9-#@$-Ay_WE^>Az zDGfu8qCa{4#M_LM$P4(v>A>}-v0%TunYoF3KGx%y7};akHJN*QteJ za@!%HAP2Q3XrX=k^x)z09JqC22qb!|foI)rxFW9!`NOI~a%Kl}S#cvWCCeeQuuSCC zBe2Z)k4f|EVUz|~SX89|ucjV?D_;u4T)`ah+CLM0uLF@mE=I=4yk=G{U(JL~4Mp+A zd%>y73oKp?K_7aLGM^u;f;8O>*l}9S-FTwK+?Cw{qgJHC`q?7;MbvZt9_$3I`({C( z<#=c`iG%Up`$Z3<9IRV$8FKo6n*DR@Vr-(cU|QI7@Vzq=`KRSFueBPPe};X`g+_U} z;giRVGpz$ZF%PCSL-e-3KOy#e`LNBSh%s##1YIH*fBCz~;v6^!2Fsg6&^&!uzr2en z_j|=WP;rBMaaNGpTmo#k z?F^IOOkjeiiQLLTX`pXripsnjLC>=c!dFj#wVnz)5b?2we zpBa&|ab!Ts;*>aTKfi5L|L14Dm+ua*sh&O_JEq#K_t=`UaoG4YssHa+q_!kpkz8GT zRN&t&;v(gqaie1+gsC;MoFqHeJW{iSq?x=YSt;9vKh7VyWjm&FL*n(!6*FUmKVz*q zediOxkA(*2@6YB5SB@X!D(j`p$DW^W{^pkLc{i;HLBB@Le4>RTml-@lxaL&O>a0j2 zjt@@K>7{vO_{1ckd#!?bS70vpu6za7$Mi*9>xdOXb2(%!u*_(!uiXodgcbjL%DAM+qB1Ym$~hD9id+YdCn@4hn|bK}zhxu-FSl&X z^#66!oO1rM@-M#N@2eHr=ffz=6imhQat3g~v_0f~QzB~;_>^4}I}R^DuY`4)FR+$z z7xCy?SuEW;5tFi`hSK zoN;v4cpN{~6+7ilWXG-zBwCgW@IkE|xTALvHX9z!uIWl+zxNO<_otivt+4|u8!2I? zJq%xJJ%isa9ECl8-e4;-a#?H=Y(b9NwNp= z?g3pkW!o6M^;Zkt=2?iz_b}YJIRnqhQp7=XzOmU)MqsOiQ2f7dn%@7qX=|oW-R$M> z>$TKXxWmg zHFN3t53~5PrY5{;nI0YC630%uRff0he1R^_tEHK1EOAZl397f~kECJyOwu&uD(3&C zk%&7{wCTeVGJdJN@YUchJAVC9@^CZ5bxxU07Di|AcHV|0VUrBzG=>WX?~AEjMH^Y@ zGn0Sv?I^LJbGbED74%HeOX4!rQ*yAdA8TYB#$y|Y2p1C#h>>>&&g`B`N{ZLey}o+% z{=p>rRCyn*`+gVu+c=T2DosM5<3W7OI)bdaEJH^IXplqAnS9O6Cj54D0U!BpfF%05 zlh?y;RO^iNAvRlXkR~pP!#6L}U$eIg(bCrZnyw>UWkfYiFVqnl_!{Ba(`u|T5JW$I zAIBwrlon3?eMcUh^btH9b?H9&HgdLIj?gQzICDmvU=_Md@;mh@SCBiN7AKz-#@VzJ z=&KN-_Wa^+E^u!EgC1Q-*VTdyDuD#YJ>#L<{fG=*!pKRiU{?>DB5g|I&}QiC^}F7f>4*JC~T9}rSjD!cvc`sweQ`il(G^q&5FY|DQY}xu0mxi zRJn$rEhN%m4VC)5o#^|Hpz4b@= z<|48A0U!KGu?y!_mD6tz4q*P~6Umrsuf!UmNO+}oyvF1tKfB6=2-4B`ipvuF0AilZ$6wuU#3(Ni=qSUy0T2%G<_*i znJ|%fs}~c!z{`@8f4`Df%`rq?4-4Z?RaC`y=n1!%_s}5on}VvcHXV`f!Zr8Ip-K0m zIP;ltv`2di$(R0$XIO=E$s6}@AM+LwvrUOO*QK9ydvlzUoG-4vxtEvUnn~h(&XI8g zM+j3hNqF0NPq2PIm}~BJ;CI`NBfb`j^vBgMw%Mg#*s38z3cjS!p)D7P-5h0nz&Dky zSJD!`4&BHVG};UCW1IxrqdH{!&u)7B!%ymD#0ld@N@2Niqq$1~JIJwPp%e!=3Dqmj z_!C~Ev8!J!S@g1njEIopdT(Cg-A`!=qaT#hHcwCNJ~&0_KDkc#{>Fwc9b3k3D=Ful zU-=W$PlaUR^vn2Ll|El!*hSjR!l>R%CxQEtOaFu{z~{bn(#JRK=zj{WSZ9e0-EDo0 z#LkK)7Vo9Fyy=VRw%XNn_#$a;+rduZ+Nc6ZwWEyzNxIi`RA*s(dO0E!nqZvpE)3;XUht6R&JsHUOSOvdGg$X^hBDv zw4l0kdnRk$Zc2}6rQqOdJHB_SAvdnDi9Wg zxyV12aF^Q2!KuT^u0l87;=o+qaHoPW{P$Ek+aIwxi|T!B{33jLo0eVW)^)nVj7t?{4F_HI+*w-P38lGq=mj4wPc~8BMsP=&JX#a zF9hu&w5vu=Fv;)anQg!L`GcR3gWm+YN#7D3vX91aXt0o^M7ZzEJL%ycv&e;zbc%1x z=Ets86qNs@;roNS@LSJw)FF1UFy>e@shHr7Z}>0gZ)z3MqrRg^wpu)WKUSA;KN85B zu3A2=tB`aCC1F*$diLnQ(|F-l#p8 z-aB=cs(if1yjIU9jc)@aI-Bi@r@ubUoVJZBdKySpU4Mm(FAotk&01-+{58zHkflSE z{i#f*4DYvJijPh;pr_ZjVK?p`@7s|{vg?nM=MKsE(c{(F-ogQUXlW3uu6fksYXWXs zdzN1Qq(#GO#!_vkldOf=Ey?l&fmHFw0o=Q7Jz45sz&30;PnnTwWWlr*^u{4Kobxl5 zYM&V(>@`eMN?+gy?v_MlOCNuFrvsbGGg#I)maM$!O&yEp5^48&BqPIVpS< zPVN&EjH-u|{zLZsuQ`!8;P+0t=Fu0v=jURwK}(jN<3p++3o*Fk_Aasm9p$Z4ZG~wW zSBTHW{rKv>8~D(PLwG)~gLj>KKx#LiV%sx4S&zXb{NSBUG|ke96wUk0cbB{1xv}L~ zfiI!<8w&B8-*FO|C0B7>uraw|_N;nSaS{%gbqb#-{lt6PZkNP19;1`kE9AegTgfG< z{gRbRYl%~vNQEA2i<|t@P;sU=QGd0XCM(Y;Uh}1e!8^y3#m5Wi$xliAH>1^5wS5#@ z7IlZMNOPn2AIb35?;>fhY6A^-D5YA6KB&ngKr7^i{oQC5Bn_cep3x4I=Vu=PK4lPtIp*r zC6IuoRN=SvM&Za=A7bESN!NWj%Qa^C@*C80iLt#meROvn_n^;}6yA3r#t#a~VKI?d zKS)K`r>DXNNVf>jJ4*=vqiWCJ4jMClRvfEIpC6P6+sQNN{{F!Q(=dXv0P~ z$#Bwyt;>A4bE-D{lm&g=HF{>)xzi8yu?yYsVj-z9mwXcM5mMF=m8=2 zN&^k)n8L-)-$xzpAjnF;C$9S+Q|GCB$&UCc>T$h} z{cHL4?UQj@suW%)u)>;|g$IEH9S@pRPF|H#MD(xkgTkzC*H zNJ>AtlG^04+{~B5@ZY{ZBB~Q`*3x9I`B^0Oa@i!ztk5L0UtHie>!wo6rQ^6w!}klT z$}`EZ`?f;Cfwjcv!8l>r`4Vb-_zsbNm`&QXd~v@~3f)wgLCde}3TwK<1(uxVER!_& z&`fpV(uB8o|LZxz9CC?9DR&AorHZ7g|2@557Rmpbbc~*@+JwKXJxBr=B{sL`IC*v9 z8;!bDDJf@r_@oU)c=T(eV6SUQPd>8}?s=zE*ikG@6BhCgCyIqzPmgd97c$g!YXouV ztsskxSMwhuTWGFd0R6KSb5iX`NTaP671GOToyL7Up<@A^<61!*vv1I%o-XS4;w5jg zUV&@Wy+~E`yvSvXSYkR{om`O`O}m#2=6)GF3fU9x@I&itxH&%}$n|@-Xsk{zzSMo4 zO1UNSwnv5uS+R2Dn&ndzL`JZ+(WA)H$`m^8(O=13;S#4<@Pp{wBGhkIDd*{Qi2HfH zoGT+mZ0k=jyq{{1_5a-82pn|~dnQj`2h%bWXnAhCkq zlBdt!s|zCc5?Aw2AMx1bQZ2r$0i=-kmW0GevnKIHGOtH*s5a7K?x z<=n^pF~|8<$8VBFWF{SzphA8%yk)Jr-iZp^csh7YCAF?RNUG2G;&BsYe6&}Tv+ z9@w45a}{m)>FPm(t5F`2t#9RP@_kU$9y22QypvdUp5>>!-A`9u5#^qtWALOC@FC5pxuR8V)v>?)(gJd!I$bv;-vE8IypA+}BHly3A3; zQD%_v*0YXGSf(aS8i~o{>XAZ}y)5>wRH8qAuElC6!x3ArFKl;SOD}I<#eJM^NiY1} zEd;%|M~d%H5^hZIiXJdPS9e&r3rr%Yi&2ET(3JewAczmTczkcBS8OzAA#bfyas}?d?-iZEjdqi^8 z{;>DfeCH_{BmO6T?Kdb%+7~#=eE7sFaWvB#l$X-)98{eJX!Qom-??MkCAr(v*((R6}Zo za6Yk&Olpat-{29|i&hsB$6v;i7C6y$4>r>>PK&+{_vIiql)e8mg8%E?PX1c1;B2B* zxnukl-okSQ_iar)xto1ZVC$FA;w^_s=wfp+dh;OejO;mf&g8>_OXnDJcU(9B+iRqt z-ueWk&RW2Y^?E>-uAVLs$8b8oM2#l+hw`~Hj{JguWATg1!QAtu6(o9B8a?!F8C{=v ziNeSlk}uank5Db*x3w8J)cWwpjc-9NJ6K24SGD3T|xiWa;pH=b6S%X50c_jtWi-*7{g9i4A; z7{4DL%6AQmz^!`eB#y2iS2gESm-PooP^}?-99U0IC_R8#2ADX0d5pea@8?Z_?ZTH2 z4C9(CXOp$7ZdXaIVQE!J5iVbLg~V*NVc#$B!Vg0W=4VMvym8W99Js3*ZyO~| z)pc(1itg6j88;bvGqH@CZMh_vf6Czsa}3Ytp1(B7#xBZTAsv2>;4U_tkIpb)&RUkHlTA}dqY(IwgCxTkH8@c!Ks+Jk1&K=ot1`fVj{ z;!H&$Df=4&!Ik&m2+!ob#Vn*D9NL98>aBZ z%Qw;RFAK4D#St2tJX{!CxQcw!%IB@zRQSjZ7prB4zNZa-Ay{wx3Q{9^LKlpcua3HO zgM>}b!-vDyp)H42lC;^=>B1Ln66tY4Vrzej=pK5{emeh^-<73J)Y^|dzjt`ZXkiTTCP7{Sw?LOpEz z{SPUR96=H`T%+-y8_E2qN?enJ2Pv`N%_Vt;k+$fGT=T7sG<3h6aH_kQl=)=~;|=!E z4LjcOo-Xt0PzNho)>ug2#rz`w-14aTyb7GT_b=ToZATPT6UZDHB`ULLzOXIGm%QPk zsBX$#E-}egqI)-x%=ZqZs%f1hP&`NU?itB#$=xno&@|+33+B{zj4j?ZP)cm(G;`Mu zxe11&4|0p|SP0Sq3REL4j|=`@!>Qd%=lt&}3QA)h^FR8A^6qDj3!i3OMl0SFk}~xo zQZ(}_DJ(7^%ND!PrPsRHslG!v-%w{RC_0HG?tMif{`qtDp)$DR_cD5~)0l?0MTk9K zE1B8DjHcW}5_=GCA;Hx3m zXUdUVXPU{1AJ55thbS_`q+C+>@gFrc3BxqzHuqZQ7PasV=d`Xm30e0;@yhb2bVv@* z7Y)Bbp5$nAtk+1|x-gk2uP-JyFSygYzyHv6XG`gNd0Anp>^H6=;-H{V;UL_#ouI_iJaMZGmU0d^l z_K!0Y)bVVJ4-FxP<`={{qYioU?4hvt@f~U}olgfVw6MaWMZ)m~nuM;8BxB1Rg&$iC zgt5}IdGx21@C%yBRKKUh>-GWOdC)k1_qutktnEnhEcpofVizN^Y@I^%%)_zsvs@bd zS)1=^8A?7MF(X=QKGAi)0{Q*SNcelEhw8cFulnTDI9!@N)iJ zjaRIvR)vSj<@AMwTXYy3MQ76OyJP6I(?;adq$%9yX}SDXHjb3-l;vz6_7H%zMEc!1 zZq8s+vY;uSM!V>c5p(|#qriFe)rLL14QtFTHQyq+yv>m|3``+TuL5cR^CqnFC6)ep zewD6YDa)DLIgpTAPhz|~lzd;XN|JTWm|a+sL7k#@8%!&D!ldoOPz-ow*r(aeYZ%Li)Yq=pp9I{X+vW6DXxMH3tl=PLBw(h)uC zBTn|MC3=?^l5V{Kdd1=r&hU_@lPqJ1*{R#Osvw!ze;7f2E;N-apYf{NRR0#~{x+1Z zoi{*ZN3JEsbz|s^RtF)tIhAg&&KJ)*WkmX*99`e7&UGp@^8O3^cnDleI;2cE?$|97 z-&}zcjdJLhixS?wCjqYvc!KvwXyIm)7_6P0M6JblPF-vppZ_UHID7L4&t31q%BO}< z99BgYL$=Z(FCB%>P*twKA)9vUhLEjKCXkII2h%rtt`g-<)TnB%U3bw0?&*om(4E4{6^K<8fQqG969Q;}=IqEq^R| zwk@LiP5N^b>!2?DzCMYa^5#A@N*>M8Xl15%Y?Yw&?}XR-6>IkY5Lf-NVlZUWwBUQE6^{wDCw_DLd7a*~?8SjX{BiqwUMe=2k1yyTFPv84 zF`>PZLC>a==M|5sm60rov?%5$o&U(6dlEro)}A6aYC~y2`ZH?T>;-)%e-rxo5AVxP za_Na+J!e_~qo-OY-edXhffu+@&g^vH_HA4uY_+;pRd zKe_RfM;g(#3nNHL(Lt<|@PJM<8iPNp$_diJWkgTgk?--o%HNRvPGkqg^26R9BNGn4 zs7^@7{N|r`NtDb_-o!eDs7WuR&ewMF{?4lO*M-aM2k+1PjQmi^ujdoU`b*QO%C8uD zzsFH%);)o>>*|T&15M#(NQUsAY(L5BzDi^|BdJ{aQtsuwX=Lc(Ws)oNF^QD1=6d(l zl20|6c<8D1q>qNu-*0U=gYA20ukK?SctHjaeE*D(jpZ>B_Y_Y8HwiyI<;k0=9MMBn zB;iV|a3X84FzS(?U}jf~*J;luA&q&Y@t-<7TJr}DH%O$4@gqpT&1I_4^@|$*2`0yi zSm6k_k~cZIMUb^n;CpSK5y$B^?6V|S?zXb6Lk5(@@v5RDsTqDX3 zO+r|(v0%9pbAwWj)4ck*blezqn!L4+^qh+0-KPB}cZ+q-J zwA=ZI+cNka8yhLM>!V+7HVRJ@k8;gZeK@A8fo^O$h4*zjkv_3`tW6#&G)&XM{Y8KA zg@K90MdK&EYFNqQhLN;>|96S)z-c10M_riuJdz~69wTUf51}fVse(t$VVu4yQi!{> zSoo^GPpEsFN{vaJaBzMiUppy)e%}0>FDi8B|LV6>`I*o75ei8n(&`W1>3f{o-MC0z zC^kv{n`F;x`VYr5yCwYV)lvAzx<~wxiN|94u8UrO?%8=j5|vjnQ3Iw(Q+K* z{uIwkFcEO{Zo2X7J|bBjP9tX+kf~d}C_6NfxUQ*X`^S}%(SI8y(OuO<^UN|@G4ePW z>Z(95ox6{3eXk(bw`fuQtb8JLgz_E-7tvO)Hi`N%mB-<2c5QQgOd5KqM zbl0(x~#i%Y3M+0qtM+8n1KNOCp2P`2g#4{NN=9 zOURB#$LYo*mOfqn07vY}qx*tqkhuj{>6W52KBW1ZxaXQgH~sx2S$Jxi@W@?RFqO-o z0U8bbc49z{Zr9Q`OOBEQWmD))`?NBVx|j?o@|Ei4E=={yw{`F z`wQ8njb~_(Ln4iHM)ZKIwBR%03h~PfBxhHgq=P;U7Y^8LBG&>apUt-6wjE(a_4!_E zdiXuQ_52RsJNys-Ym7VAJ|{)hf8UcNNDZSU#DTICo%*+U^wCembzdKmnpH^`J{U$U3{MlEK3(EJbvCd1 zFPP{|mm@=7-63`##|Z-)6G_HRIjS^{$2;>oX~F9RV&5r?LQn&?Uv-%8Qv|wA>R?st zg-#lAQxn&%C;UboPrSe~gABT{kN>>j7kfuG9=8=*k>wFhl1W;T{M`L!NZ#l&ANjA3 z$n-K~be=Vr{$eh14O_`oE_+Nmo<(q4zmw^zGoyr;CKribOS@21&_%C#UBRi&!-a46 zl1Opy1Yvw~G|9j3C-EBHLr(gS5~S`Y;K^I1xw6TZQCL#3u=;BR@4ja%^}qgtyWn>P zyBu>Pb5b>gTaT@|kIG-jo4ZwHp4wW0{c{x$TydfkY;Ea}Ss7%+r6O)I`-G~79O1^M zl?s0%m(dIB3b_?|Ux~b5D`&S~T?h{kl;|8&A;t35H1TILt7SHWw@9!i19tADH{DLq zz8*t3btR!fX#-chisjmSL&>f^RwNJ}6Pu7*SjKUbF!<$jvSrIKGA(>3ue|diPAPm# zGag>SCAU=sgMA(R+Q|DPE$=Y-G?(KReYK>Q;@4wO)pz*(wa=8(yh?P8ALAo)X3_5w zdA51McG~Q$M~*gS6Ca@+;tL^BF>Su%cJlPI+ROHuOLeXE>nv;{n#YmlZJkX=L7P+ zNmNuMxsRvNteFXPLS-$xK(C3-G;bvX9~FuG1aVGuES>26KYX2OSWRE}ur<)6P=rz{ z6=`li&)RD%Q6XhWsAS4KM1;(tfkGP5pfsUmE_L?WQbJ`;#thL;TVKc0!4a$2{182+rx8alP8HbrxzCxbq#GZI@|Agu?SZi4bo#bgo-@ra`N6%)a>2jCX|MiQH>^E3Y*tU*(>wKhv zukN$#j;pl$3)_;}oyx*XA)I}fq#&HRGEhvtbX|-%lu2*T)E0KMOlLJ2eT4CAjildw z1?Jv=s!(egBvdXer4}tanYnz{%a|HLWMKV|^=cyyFy; zBg5G2+3K`$iW%jaS5q6U6uMX*gv?p=O5E}-ms#BV$$U#&`8fCfvg3Cb@fA(0*x9eD zY(dpMnd#y(KIKs~-zoab%tLHM>47spvb|h9*zdXM9GyTLrmW*rXAI@1#1CMT^dE@h z6+VkwFJI+9+0>S-3*+fEK~*xorOOZOe^`9;a2&lm+n@ReTxM3X*{pA51naTahxxVd zqah_`Y)4h9O!e?We&S|pF?GulezM6mvD0Kb3!%2ue`XO0`sK$~q^+PM+=uf0DyLDC zx50QJ{WP8OyOCdRF_8JqJj4q6>5Gd0ylI5p7`|@Yc=3+?H8#rY3s%RSkUg8EAq+U! z#Sapl>D}jPyjRFsv8S;I?>=!Qy>LKAcW=t0(x`Nr@gYg*mwS|l(+BuW-`y+-O34N5 z9A1{QUm6lLlkNFiEj#XS%q%~jq`$rESjD@kVz*`8i`%C@p(}>${Qe=FJi_bZ5{>OL)o4>k7Tx{D@z+ zuP-w+?@x;cj15LUadW{vj^sPeHWVz56# zTKdDC)~U+7CwW8Z)2dv)M@KBZJyoAx-j&3fW!LC>_nYF8<^<~0QO!D%4$~UD!EE-R z4AwO=LP}~YqY87*G8fAX`r2-?5Zql~Dnmo*W5Yc5uhmxg+Lc?A5SeT{^N6>6!OPjJ>k`y@??$^HnNJyLfX3a4BP1T zm&I;15uRQ0WQWb;*<0s+(yIbaN{O+fqu<`9F7dtD>Qy~Ozr(xetLkH{VMYa$cBx5j zX}Q85lPKYv(49W9rzrcpd4x8vziheX+rMT6Lx(|W8aR6{apr|e&Hl2|c% z5QZSaI_(w-*hdfqQ+&GxUL!@lHkhVp$>%)BIFYR_P&qsE;rC(c=Oi?i?#yQ)(mW`XtjGroALP zd7U;lQ-dx%b$}&jw$YeUWj5Sx60I71oNt-+i8jpD7RX==>gHBJXY5Ip8R%*78+`8b z-mdHT&S7RWvtE-r__>Plk6+P3_Y!t%l`C}}n?qhmUs&*gdGvijG;jRpF~9KCXdiVH#3(uBZ>OR_(X+DvTN=$dR(Ksr1|(MD;6iPf8NDR$FW^}x-g376fIan=l1X{znk=9a5(SuGn$P{IxoJup~yBpi()=!8%2e+duYdr*W$Pc zEp~QJEPK^^l~642VLR-4jh&v>LaEvmp>%4v&=T(=zI+@)8`bym@o(lx&c>HmW|qBh zdw2q0*ykX%FgKRXIiXBfO{5Pt-C@1E8}PnWZKBK8;j*N!>q|D9ZD*%y&$C;{ z?8W%Kk-TK|R6KeofV!2;mJNMiOY;UVqDRB?SaQxqvMW`SMSbJxi_NiO@SGgRdI}=T zaAi&*{YjndEqVTLrC4tsN7mTp$n+k&(p}4*iBe}C^O5Zm3uZXUj*iX{jEhS6k-c*H zgIDIs>vyrd$M0S&G1o-!{aDGibzY+vQ$qM@+G(O}<6gRC)Cn5n^HH=62&Z2RGwGb_ z(X1l*ji|TgmMEP^!Od$Xd*%C_9zCbcc8=*KE=#>lTP(Ixv{t0P7I#I5FrLkM7DU_1 zTEt(OxA|DV<+RP0u!UZRLO0J=T6$4IJ|9w0tWljqCvH+^%87@VbxjOi(7lM>X&EBv zjk?T+U)fD(8v4_~)cfL-F<(S)c`ws$SiNkM#Y8@Kakv+JdHrv%in$I-n-= zwsu?w-Eqx_Wo6Exu}|Fu+Xj>@E}Bcx8t2%#yH@gd%w&On+$)x(*07j7OKHve7@8<$ zivNx7&en_?DLo$1eZ=r~GvW2?PVvT|MuDEF7PQ+XA+hgg@j?6qVP!`q-O+koIja2$GR}#@1lay^_mHN^EI)FP1fEG1L4zkKb;a$GX4WA?nwN z%wc~jCB0<)vrpzUyiALBTOiMgTWVNfh#4&?FQWPL>%?$d2fDIiI6KlqPuTY$o*Jsp z5C^?nPp|mAVC3Z_VXeF`aV3JvEF!xz6aNpCt~|^ON(-5D#ueVaHJ&!UFctK(Q8MhO zAq{;J$a>${B2JH+EzOL)$k$Fj!4{nU#!`&B#J(=h#U9goOE0bo(##J%=;nvYv~~1j zc0S3F3i}FKPQ_5RQga@S$TXtM@_ULS#w64GG5IWfQVr|*SYNpRM1d_CX33&*+^B=b z40&8v$sU_UNsc2Qi);EEqoi#BJv6i*J=sehANwBWz2;T$(r|yae^okNwbNDBcZnU{ zZcrpDTrn4%ru1gVelHM{4i93V`fBhc>zl-XJEpT?dIP1i|IMOLX8sm;A2Fm!K0~Gc zceJGEKi0FIk}b{WB3RtILS`p(q0Up2*|*QVq}}Vsh-!Ju`AwG#>E8%jYN@STLPAso z!v&S>gY7ixz5NVr{kfL*lX+6ZU6J(p^X_z|o)*))Rm&@=QaZ*bSTx=AlV9m_UX((6 zNJGBg7j>@nW2t@jv4qL{_`a)C`3bMjh|lupFtxlCDZkfUy5Fz2^usSyx*RxMI^Z^y zTFq3X;mQU=P0nOtV^ApVY|2Bsh!pBRXqWh0J)64u{p6p{%M}N{KEw?F1Tg1@CH($f z6WHOfNi<;m4`%bUJ5`Gj#YOcw(#P+&=&foM>EUV*>XJP}IvZ&#rK@$Z;J8QhpVmVL z`7!L%Yo3h@C-)8zk$o|CW zK;J!L?}D3R-qbMRm_db%ywszTx4qEK^fTMMYd0Nv`gQ2$#iS+ z=$i@jzz`Yl{%)b*x%dawDaj`4X0`P4s!eq6j#A-4Krnlxzf{t;GGXxt^My@96*GFN zLf@ZVC&`}q%XE|{Qhwt#TJpqPa+v2w4c&sISx-#av{SZpXuoeVmFbNv;k3F`xxk7Q zg-6d&PUU@BI_zX{Nzyf;A=cGuiaR zKT3Kk-(bakKsvi}m2m7sjFi#qRLL%V4Z(Dz5+m!C#YYR?Q-(pY!w|AB_(71qp`@~5- z)@TYgTdP^0Fr-F5{KdbuJp`9u17$nvt=Ll!A9hxXXVy1DCDpWd!Ys!T!gte4)XhkP z4lo`l)phq~4`;_qGWjN3*f14om}4{>Z?S~YoNmJPfW35!VI;qOaSp#NsE?GLHd=mX zA7cXt-e8-ezd}iT75@Gj|j-2<2PT3v9 z6hBUq40qcLhff{gd$`52y-uo<$Ax0*x+F;$Tl+=Skk3MB>L=3PKa1Ehzeq+Fuawqi zfMk7qEGtt}Wp-idjP?7>vc|@+mnyMD@kj`*NVAb9KlB&WR)(|0-5SyaqeNNXqVux9 z`8%aFwGnv1H%e?hZ^$4Y&D`;jiM?XHh&aWDEL|B=;g8k_gPEjpcT%{Dsb{Ng2 z_G|Y_W5OTG_)+q_S{y2lZP8&37ddI;bq%)s*;KZALIhoUAy}r=&?aUv1Nu8Mh<|bR z7mM4lir&7sMLKz{jb`mjp*Pp?+)GU02wAgt%)f`qREl zW(hkJ6X{>uNPc3j7MoZzjP_YRm74xKD7HQq=`#KIqFIAG@y%6d7CB8MTGfi)o4!;$ zEk6$iVJ5;hi$>A>Z(qs%%vtGv@EE1tlYO%h^xQC4%MHLRIfd!0Z&XuINDXPS6-KBq0J{L zdbS_EGtWp;9ywgd6~>8YXUr9@+%uLIR}U3#sEwn;MjJ6{T^7rGbbIqYRuPm*;9#6H>@}-#VPEuRLFKM0gL~+3rAL+4039Gu8#QwdED_*zwApLgbyzG*E z2DB_sN7AgZ6vEftWc!;(N+&N*l9~?bilcvINs@C5^KHK_bw8`fn(JmspPdJb!+iG_ zzvFt!V`X=C@az&6U4E417pCBgu(j0RdN1qoM@85}TV+$Ul*JM6hZFB8`HWz{ZM0b- zl5YrWS{b4&os51*H@o-dH+fhKmhxE^ z`ri?HX5&6~V)!9!{%*$-mmA@-ubXL5^*!0r&}iyxY$H|&-)2Q-R_xAZKOw|WN4ny2 zi5{GFfR%dp6}Amhp(_^1Yo=dQ`Kxm#vbAn%lC}13UUT1W(I9v{{rzGje}2_Uw$E=R z-+1pBUwL(?nDpi;->X5sClc(>+!F`M@HXW?jxDDHCs&a5tA{cdMP1oM?<@3=f0y{) zZIbv(b1!|AkS9+3P|CNzna-bkm`R+j*U94kWs~n7-6ZFcR&1JD5W70Sl}&SfAx4kQ z<@4EiRLD4@NdjI9SqsU@D#8qRrOu)_ky;FFSJN3?-Wj_>9+1yvKm8vL3$b!i54C_NTLr zPq>=I`)u1UW{-%VzLnqki+A_X2Ui`%ZBzeZ*LxWaTzi$zFf`%Mg%M)*NfTDxZ6E*8#6zST|M0Vp=>e=aq?WxsiKo2w9WidTIIwBIc;#%B zcF5XR3A3PE3eY(W(FW+#`woB~ZvC(Wzrvv?^zLjLV zg^Ar~Hi;2Nykt5imVcdIAN>r?=Dkt(!&v^j2o}A(RdX43Pc~?kfd% z_ooh)C+NT#2{h>K9p<8w!%SBuOC#eV>4&)oS%&E;x8Rf3qHi{`}+vhSI6P*RlbRRAfh1T@%wyJca2od&QJhdP3_~ zYc{LfVCh$t#A;Hz(bse4(B&_S#JkUDO6xoqiA}s3TQPsDFx+2DJ~N=pu8+)N%16$z zFN6BBsW)s{k9l3pXhVazaQ_8%$wrN3@r=LJJVuzV94YCoTZg^e&7^O)H#56S$u#ER zctJA$L1m3U+11BQ^ybP2TDqBvbE=lHQT997W2;?sSI~DF6Y`kFKJa9r4tq)daaA$8 zJwnLreVm{5s*#Rpccl9)e!HBCY?Io;KMp{^2SfLd9*ckce*6DIw`Qj-wI^k8?9JrUMN36 zr&@G2%px608Pv5?iCj%RMhCduX3NJmkeE~TqQkf*UVW-BofUAH3YBFWDDMS&R6s!g^Uq=_r$){?7YnW&79`Ac=YS&kaZAo8C zZjHUf{-$hXa6lgKB^B_i8V}MH6OK{qwzbSAA)MNMxFeq1c3uoiR;B$7W>B?rMWVr~ z7^WQQFLpdy&X%}I{E-|P`#WSh)n4}JFmpH z%vi|BZTU_b%U98%Z9!t%mq1y-!v@-aTL-t6*!gY8ja~if${Q(t(7lG-1yX_Ulj&>a2N>-S?PFUGHCKccYFo9REQ~s;?~W z8+sI!%jUnfrOu6(!j7B$n8zPuN%dbL9rJak^!!sgQ}4Hku05q9ysZyp zf2}_9j*IQt;0wE@iBg={Z|*{2^o4$)WYqxxWy!k&3&K26qjEY4-CN9pInqSVjgE3Z%YWDwEcTFn zmc9*~C~0<25*9}|^P%$DjvkBuW5-t~(}I$-w0-4JLBn>nu%J>BC&<6oNQK|@j=VOJ zXWWC$dD&AkKIchs)gr;CW(tkZiYLFj-8`o6W1vrM55Q_z{!h>4|OfnNOVtad5>{ zR^RVCE3=!-Vw(>OY8@Snck_^LbRDEe_9sh&MisDU+jFEunTt3#{TEwrDnHr$Ir(m2 z1z+2X@!9#V{G2PsH0Jtf=9Ku4p4vW8>{X{CC9E4ky+vO(A#R|wc->q!D*HR@_Nj}+ zZC%c0&lpO747$MDpq=k;c$`oFSH#jz_mEOgJQw|pAMhFm3e3Pjhu-(x#xU+R@4L~6 z-t?$u4c~gxu|*Sz`1F*xd%+H|?&(%KU_&o)lwBx4(*&2J{9x))_j&CA)n?fj9D*v&!OmL2~#N2 z5_Yc$BmWK6N_ z=&9x+vFp$@`s1VrnVAy7Y~Fa#^|yohw6qEGStoy%U)IU{MvUW4w7(Oroi)_|Ym)eS z?^$uo{q8h&_;MO)&_dKbcJN0+Id*mD6&f)3l{oz=M=Pyv$ofAv;-7lhQZxB`;+A{| zWLR`5Um--WnPcUhQiWU3gZoc^Y zYH`MC2liI@!fPv}GxO}%yoUCC+25=o(%pu^e8z%9qUNwd`gmHDcsu5+xW!Y0wW{3W zJKQ^&W%^UT_%&nm*=?En!{cJz{n5;D$^`a*u>t=BP+{=DfCK(NapC`=1I8+FsAf9= zXRcQU-zgGC&ho=g{XJ0jMJ^C;7>0%>vCyugN%kHng8$CSfz_*$KtWcAYxFkYofYNa zvTFcjhH$vj;R84kAFk@&YbaZFms^?>h}Lzkpub`#W(W5s-R>J=OmZxq>7j$ebcdsD zk=np!=NzC-#Y`J3>*WxhX$co^Bxno zT9a8CW;pJ{UaUIV;c(fa0~bwF!Z&|I@!O6!99J)a-H&99dcG9>g57XsM^98pF~r>6 zW}yEzQ7)g_Ef@VY!R1@G;mVmBgIk0KJT27=8uUqR!Q6kM_L0I8j>i=%`XVDrN9NAg5? zE|+~8znH~s7klB>`2Qe$tS9Px9SXA#JO!5tYI5+BIi7p zqHHT+iP{W!;4wnJW@ZUj>aKHY)}x?)%LQ)62^EM6Du;8bJ#dZ9eQ+N550>iO;zrmd zK-GVU@4hTUr8r#>ZCjzMy4fM+rY=q|+6{HZ1<=EN8W$Sg$gO$Q0QbJuz>ECtV0keP zKFn){g&J$P=@SN`;txGkKH3lFoYMlmr~UEYyPddW`yu=pU<%L2r?xn7w1NL2U_r+gAbsG z%PPVJ?Z+3tEXb}CyYTnoIwD=YjYh&sEY#~s!ndemqwRjwvYS9g40?poe;d)c&kUk{ zED~DoY{6F-*OBor9*}@D@9=2FLQ*)mfIHM}0ogLE2BX&g!$%*VK`QDMB`1lq$jqrf`mOWwd%n|VWm@7Brya=)d zX<)SJ4A+^_#yKz84%^KlKpTcaAJ6Sv^*wboJJ}y&$L@tvrG4DFq`6>V@|kmPRY&z+ z@3`s_I-Ig`e^7S(2G-imaBx5lXFbY?vl-z zFLW{Xc@K0|2nT~C23n(K(5)dAmW0VATSIO@s<{dzpPR}lzOCX&V=?zPtc9z|Hv@-T zJ#oz9Cm?T&qimTSoEUcozRq8ReG>QJK~Bc43HS-Ioiz>(UfwvMIUK5O)KDw8)M38% zBd%fieQ5lf3N8)(aC@Jz!23>zoAWZbYxx7Px0_tDXVbEQOi z83Va?eWB$_GiXk8<6>vMhT&(Ha(3m8xbo2>xtiArgEW@PfxbFedU`Z=jaR}wrB9%` zmny1mHp0llo^aITGiQI;N-jjw1A}(C;Chk~9$Bx8Z@y{cK9!wlK3^YCzLJZs)gR&8 zt4DC+c|~}frG$-6C%E8^e%QaWJMv8&dNu9g*4o!{eXP%b-k2@e6txl{WG6U;)Whi* z4Ny6yf*GKJzjhgcs#f;y^eF~dJ88`q`_w0M%cdD5Sm|Qz->Ea znEdP~{Pe8nHf(&JSo#u8d zY~j}4kASX*G;p7K1B(0B0RNK*7#IbSoz56?*a=Hbv>>4K2weQ~z@ao)1?yXUpm**h8Rnfr~1Wj)B2%mhA|=o0^dMtI%h41UrG=ha6x zlAG^_k&eG_KubJ8oHQqr?$%lmrQ(456f^NP%OGLbFXO_4%KW$9h5YH$d&pb!m*lUZ z3z=(^OY&NE$b!vN`2xi_Vz#&siRpiWSUm4Tf-lS>PPK(($Y)hzKemhXz3j^;-;;~O zFQgJ9y(0c+uXO(7zXRk^jC>Vi+Gx-n?89H3Ye#awb>pk9`k^@a0WMMPAiH!=5uJ)i zk`lOqB*#4@N^;Tpm&0~^(t=U^-LKkYs>ww1W{nGPSo;o6alu#?v>kR7rjjqprljrn zQZjl#Z`>Jp7MISk$Hrwx@y?FZ5VxQQbl>|A#Et~qQB{VCM}NZA|KjlUR2S@c87@kfyvnQfSdzcQ~98<`q;o!OH(|6PZZ2Djj$MTL-m zC+5~wUUeHD>DLd>Z_)=70~t1i1mRs*p8LBt5^78o(R1Tx%u|)XCJD);o1 z_^YUxP=z^LkK$sVF#M^%0t2?(L(|^VaL$Sncoe$<^KSWJU6))|J7*cLn!FbuEcYU9 zJ<>_cj@uyg(j(R{l=(@wB{JcG9FRBNnzZk(;9jRylBW{`aKU3YLhF^`*rl(qx>q-{ zE%FZXDbFx->L}FgaR$9ABcZ&yFZn)QiIi3Hcq``wMx}7%nnr~j$Qg*g42t0b>q#7Y z{l*ol`|wp{48FK@64$?&hneBg#3*Ps+B~y|wG-Ze)y4hfu%AB3`s9WqzS==+=Q%7} zd<%D2MRIL!`5-CQ!8Dy}yceGeYaEMlT-QOe=S~{#DY}Alb143eI)*3o?C_tF2FY?x z!%ch5(R6n#YSs?K9mP7h-D3gTc2!}$wkkQcXCxVJycDvu(=q$VadOe8mMF^QAlH2I z$k^y|Zo1DxGTYA%yS048!agogap8v?!rp?T2G!&7>%qAA%{;sxU5W>mO@m=!fyA(* z9hZ((#T7ey<0YddWCqcPzg|xvYQkt^DBUJm}<43@V=PUF*cZrfLLctW4bfrFI}H+)w?$AU?mU%58EDNW~+ zme}Eegjn=-50MYl4#M2rRPfNNfq-lRBVV3JlRw|MV$#4pDKdi==lh(l{Ww_o?=&n> zR>tlnGAKTzisjWmx#?=5V8hRn$IApQSDC}si4728r_8zT`VFmDNOzjAiK)ocMSyB>nV6H(mJsxfGj>xEUP6yd^y?oh#6!LUt}jP`nmv+sO{ zQ%??&*k>VR>4xpZ$e;j!D0mSFN<$LYlMhQS#82UJVdh|CoRamBd^rb! zO|~U9ZDHh4`B6@qsE^G)qp;@yH1>m9kmDUi!IjY6Kt?`g$Mhhx%=&VpwH<-(3m2Dho&MdN>~HqgLGk+M-jS>RDsYFKImUPi5pfN zfw}?jxEmYy;?KfM=+bouN{`&eS%vL zA0w~-;f|l03ZJHSKxK10x;2%8?ti_|wYmhS%K~7}nKZo9n1avh(qPiTEa>ya4R<)N zLQ|uo7?61ZylmzFPi+$EP%R+y{!Yciztl;Sjt6d3zK$yT{qg#(iD2FPH1zH&$NQTt zNvV@DuGhSPdygfd?VKY-r9A05nP75)usssFZ4=cxpldaKjaMhnI@@1GZ z*;s8&a6=s~STKSFe{6;R^B%+U8H@3cVIv&Roq*CLZSwKC89za9I2o1eNRsT%Va_rw z;#wU});pgi3YB-^U3XLR%d(3joeAXPe?DY#UkRV?T>{QmhGOjWRR!Nsf_K3=pDd@NI7`s*Wb{9q4MELjQpzYlU<*>c(T z`YE_!$p`o|{Uv8_bB{ADo-NOle!|zjcB)9&tJ#IRYz;&MPg3UA(Cv5ov zi!IgB_`lVl@0$%XH-ZvN70}3ye=8TWISj+qIi~0&YlK8E z9dL}2z`RDAXwNv0iNA(`*5-FuI(I2|@?`=Jve*Thyj=91#c|mxh1@#vE{wdQje67V z;LC_vpch>ZFU}q0YKa%Nz3h)gc5;}p92WhlstK~~8!#)Y2z6iNg4TznWY+fEczD}8 z+`RfK9;sC(?(HMdRO=owGqgY{WIC9JH^VH$DiRtth&VkC!$Zr{;lK1x`1)cbUP*k! zC0j1V_H}1qVBv3c=o*3}|5*`s;sGhvc#g|1`;mZKWq9xI9o)2z!;4B@WR%i<^iX!i z=GGSUm;v}Q{4^SRD$4KZYBc&Wm7MOMOfm*O0!u-Wyv$MK$G?3|Mw$P>S!-XTkN0Zs z*1Q8GMGmQ7xx$}p3`>Dk2R(6dtvnudQ6%kHgAE%*yjH0|f`5I2lW&I*gPlQS0Xd9T z7R5O8Wh^n_Hq&}IRV zmh;g&^et{pRU>!HR$$Gu_vk8J#Ya!HNv0e|?NIuGeB-mBxVjhrt-}O6+V7y|A~WLb z^&3~&JS6=-Z6ukSrVyc^3Zr5_VBAk-wEQlD>HD>~B#cL&HSw4jAmY}m-TA0f%SgdH zHL^O$hVb7g8D(^WOf-2wGTWzsOZr*-D(b+(=5P|AK8jpv9Yn(JkAMqEFs65Z@>X{f z*;Jj5Q-+jd#D5=gxloO_p3J~k_lwcgaxxjc(j9kSmy6^y=E4qhxwubb8Mk0ke@qzs z3Q}X0@kDhG80dK#epu_l-}UM^<(m%-UDuu4w^9c}f;7=;el4UOe$NHYWZatr%b?Px z6e2?k9AvweLw#H^cUR31Ba|~aqUnJCF8eT8bsKyp-{k^{Dp0Wth3{62p<8P(mK;)p zq`=!;O#jL7dUPUpNuFP;>`DRF)&n=S9)_{Xa$!haBv=~nfzeAtaLwgaXnpZ1mvcQF zr*To-z|aDWN~i#j;#L^%@5nv0`pQ{Lzo5ND0sDHFK>kN(C>>o3E^%?(r_oce&sYsC zS}d1()VM$o<8FAVd>;5#U4ilIPH-c}Zoua|yW_#dV{tYMh4(gVF#3cUjNcTEcEU|g zw{-&dXE6b>WhK7*bQhjScXAsS6mZv#*klC*PJFCUDgES6mwy7*#T(w{tw@tFo(N-+o0r~8}3xw3(5Ye_{Py4lcrw; ztv{C7@NPZMyqm#|8r%q-XU34xR++fCS`Y8E$!kl;>K(TA&cl4)Ghj7vBRZecgu=@n zz=Sc#qQ_wFDg!*Vdo39D`VTBye{n&~3E!`m*Mq(vgx2!~FzcB$E_Tb8cex_b$!QX# z9esqOEl!|nb20WBcNkZFjKv#2df}j6)5sxp0oRrH0u^`&5I;x$dg{QzU;1PD+1ZfO zBN5LmFTyR0<+V{SQ*1O!fRvNZu|(JfA9^y>TDFUfQrV8BgHBpTP%T z3sNZr;bKEQtQj^9T@*8*vPOhlG7kOvm7rdBAwD#(#E&yi!KS1jTsy&og!ev4#K1Wi z_^*I0o7cp7U$rAykpk*=p2f!+qrhupJ`8ZUj46B>Wz)c;8%ZzSRL(GJ$b3p0q-N z{}Jwp;eQU_Tcfz9PY2-Mr)YT&zng2i>diGKIDm2aeRy!y!y%$i3wN&BUJmJ=gHwJj zl-I`UVcZN|3PLrs+T(adD9I&rp3aDqXqD;E)gD%NrQqb2Apx3JU0D)9$tPu#jQT=3>(@yp_k$a zc>gCHCI;^2>IEI#oAexdzwU(J+N!W?O*J&-TfhpNp7?gfE3SUoQ0|415ttAA4Ug~S z1GiG*6s{F>DZWGHQt<|;%1Pv2jW35#|4p2mACEg{pWr;rO)=0u9GzxFf!&7n*uF9o z-aRabiAiCwRYuX{gC=aa^pxv0-v`#nf|he~;M4?Zn7K{{{=4EmS@ zEfM)x=)DWO9#}yv(Zlbnb-1_M_IS;AhCE;R2-;2ITt~oXE@#Jim|foqgU6J?=U`t@ zn^6F{r}Mabjswv9jeuhY&H9HwVi5^!`&}(In|5c!LS}Me*0WZz8%j!5X{k4 zEZ`1qo`aWSE^=YNrsLJ1aD23&04iqwk$dC*H;lCxIS+LK^hVUeMeztW)n~x@DJLMzXC$Unc5pcz8*q`rI-EbC44xVpLQaqo z#;6QMAJtQ6aQG%14n0Tak`<)(^98b?(F(hjYLX(q6)4vxVBL*sXjsu7mQFkd{*EbV zSKEVl4H}5DQE8Z$=!kLYiNt?LKDZ8>Px^k}3jyxOh+>ag7&NUfsoA}q9R2bcPtA%W zFOPn~mnvh)jerWdq%x2wjkyE8oNJ&xa2)=c-w2mFM`1$5K(e$=UW=MGlBhpiLo(jy zVcJ4Vl0CwosD)<{T$B%&M;yed)~C2Z|6IxSr^CtbzD1~#a}I(B55j$pHJBA2O&)$T z#Ao+=lZ{ck$k6Hg(L2kJEX}qhs^_B7t-2PJV(rP;JHw#z$XHl)KaF#-U4qtzr=f1h zcj&{bL;t5I!Dm-0OnRz`4o@q&yIR`ZE+~dQ8Vo*(x$xKO1((*Y0*j{yz_5>d;8wjo zrxs{|3r|kvcH>wqzS+nvTJ4RdA-%C&HydiFB;em;>UieGacDCSg}PsVVa%cq?yr0* z+BnA-9A)+}cX}u1IqWce+5AH;RceGKF-oY|M;kla?!bZQ1-L);2(m3bAgnwQtSs+4 zSoX@n85)h?Hgqy7S@h!GQeP+-@f#EzjBwGFfp|2*7tRgZ35QQU;Uq2%oZC;sd)YAv zN!|iKH`-$2%v{dgyPmuAY!K%;em`_yPT=ElKPZz+g4CP(L9XgSF7d%#aDVT>rC027 z=(jb9iwQXhx8+)uj3e{7pZ8U{$F&+@;Gzu`F(Vyry~*di*9_%ajxPka5eD3+vm(Sc ziEw4`C$9a$Ita~}05N@(VBUbkT*~Zl==@pEjb7@>6==%1tk~sT$Gwl-@kh$wZDSAl z?+?Q@hcK|;JCxITVgT1V9k5SR2Q1kd$!RU$4}Tm7JB*)l0rs0zajIhv!pHN>VeEe; z+@+DTA-k^t*G?@1SoVigiP43N#of8X=jOqIh&`}nT?w}q5$0r2-`k6fyctb8lnC;T%A_G^7#_*$Lz-JC4*NA94o~_6108yh2_J9Z`DuMI?ne<+ zni*oD_Fyta+6p!o6LIBGE%^F=6B+V514imulD{U!WZ8{W>}%7H1E`s-z#^bQcc<|ZX32Nj$I_eK1v-ebB#~T^WUSo!W!$I7>eIbdSzmJ3s zvBZxNJn6g75Hx=)6KC6*xM*1x-sqnUBVH+>s$wag^|+1UCw=g_`e96;FoNh@6@h#T zBX`{+$(tqxTzY|Ec(x^RUvVA`Dn{a*->Oi}9ma+K(lL2vC{|8%0?kBy91JO-9JCl0 zxcb7~eMh(_>nFgcu(cR-)flIG4+izXaM++Ag2uO=*ld`_DP_pxy|!cA8q^?$F_Xah z2an6T%lnIt1M!oRKbrMZ$BW>Msyh03-25=^c^e5cuc$y+|0pokwuO5uR>H?VJ+M-t z61P6wh<7|(@xqf6@Iplu#~pDcRhN2`Hpi*(?8|$6a78Y_v-QU0-*@0rtv+6Ua*m6e zX(t!a*y7!g1(ye)-k_b$V2ooTpE!<6aeh*ng+%62_?W?I!Oi&Jrn|&Aeh5t6bO_Em`Er#J@*MtBCA>CMz>dVLTrJxH zB_GamTJk(jySae7H@nus{`g03T}=Zt{5cBVAKr3yQIT9($5L4NFb+m{JLKRX?`J)I zn9Hqy+6#9)TF8x2AB({p;`E8bVbF|+u-sgNh69sf>rD=BHz}j{?^fVDU19eN`+4XSlyLluiA0Lbho5$d9P=`f+gHZc;o`d4ESos;< z4UManaoPOQ+_aSOa6GdIZmg6bO}8H^FB}7r9?`Hg*nv~#u0l=XH)swh1nYN^5S{b_ zdKL}9`Z@h^T9q;+NyS(?{~x?}I*81u4y19uIQGVV7^tmGv`lm0oM$TM^d}gu%kxM@ zxx_3d?hHiN*K;>S4xIczu>*8r!QVK{d2s<}kADpxzM12WdByl-em)#AcY)A#V>y4dVkqK4xLpXOL%pUEn|H6a9GhA5T$=s^wwXoymKS+C;;t-Vm zj!VeT=Ym6{<-L9%&VG74yfJe}^=ePhTo(>K0?k2RF2>O?3xYY(Z=vRtG3ZVEKYYD; zR8DXB_MPUDl9W&?jij`%X-9*KOy342Q;1|t#$;@s2Z~fw(x}j=?tSe@5sD;KG7n86 zgveaaetzqD*Lv2w*8BF){dfDm_jR4;aUSRKInLzSO#}%iM9k9#&CIcGYtS%H4_0U= zfshUjcwR1q33GP_Gxn^69kMxaTdfS(92o+G{(iNH%wVA5syU$KV-g^KPb@~q2#oKrK;y41*zA@G{v4XZ?EL8iug*&VqffLluk+q9J6C(dD+R5vvONRVuc-qw z8f=ly`?v71iUwLg|2EX?mqVW}ctG|-BT|YngEi6zK%UbTAiPQFE!IGxEeGMYJUwu~ zx(SZ?lmUJ3E@w=ZRSC6yHZVCMjb_?Qz-!@3=(nINIxlzyZA+Cz?$ z*Zu%5-}ep%Rk<;K6*0)*lOyz~azsrZcY|fCG+=IlCgScsfk$r?!Z(#sy)IYp*Q(>i^J{+4G2>cJuf=kxE1}~1@ z0b&1?;p}%B@XxH<@Sxgf*nNB=c$vBx&X1gj9IIW?TKf?Y^4b6$%$v)s6Q7MbFB`)r ziAUj4RL0z_ss+Qpav=G56n;K99u5~ygOAG7;oImZOo{+j!%>ycFsv2)OXvZY8wX(V zVWEzC%frROJC@L4hl+F>VgK$+=tYV=4DS8|Z#@nKC;zs<0atsVY|_Xy?l}aOdb|U9 zQNhr2X9CE2tq$iZ?Euit5QW?Wz<%{PXp0%Z8cajwVXMHtzgEb}#1{>j%z@j@Hlp$S zQlUoM5X`J`fEOJTP}#~{0c3av#LH3gv*$isIoAnknkIH3*b?X!lT0vmu`ZV@#9*b3GP5bnawjo?2466PkS30_+E zGA2`^LD;>SAiPfKr7b(aDUH+cMgMnjZJsC0d2S95wR{3X@Gdl8lnr|nTA5p=CBSHF zhlROpK0NzS2UdJDgcX*P7!$GwY;^1e8!!1oWfx!I_N*4V96;jaM45^nRH}Br3sT!jGR6E1t~n#^aI?LxQOf?T?G#V z>zLJ@U7B1QWU}p-#32keYuBE)7R8dz>HBU2+e+PCWs}j#R^$7ev5ogFN~% z5{CBKFN0N9Ay7j11YG#D5_T;YgTbBh=+>JPuzX8DkTh_Ci+rzu>7~U$ohHHQN^XK# zQWhaT&}{ZUi?b)16`G%0buOWSDS%2~etC0eZn8INzp$bi=2k@R)00!Bm1O&Mrkea`VvSaT;iR<|gDJ z?E&|YJ`@?B1RoX^f%0+(h*ngfiG?1h-S0SD-xdaHuKb2s_J84r>Di3buNq+<@(j4W zUxZ${X~0nZ4ajrh3~c2$06oOzk=Be1_|IP(S#8@61LyriLBfn~J{G|(Hh-aEr4>AP z|3BE7rHkOf$8a>Af>vs8LC0NBfm6Hl;QNVBkkixA*yP=4#9V1Z^EQnG9Zhj)RM|yX zmu-k5173oKUYbx^*%?iHq=;tHq;xjdDQF?JUZln+T0m^^lK_H?S$%A%r_EM2A^vG(Dh7 zfP>1Te|MtM`HB01bZ;pt3v)!`^8#Rtc{+^N?m` z)~0TNxf>QTV|&YC*zeW0|29!#@Qg-TIDV4vzwu&H)5s47Td z#yXnlXlRqYv-owiZ-OS$(Ke+GE zYv{gH3`*9#fhQ)3LyH;V@Q#)^OkOUA)Ml_?SlR$QIbRG`J3M7F(t?2d?_E$!#|ACV zH-e`#LZSJQNDy^Q6&fc8!<5$-1dqr}@OEGf9Ii&p>~-6q&7#?`!~Yq`TRFfOZI1)3 z;~>23cMGEOOF;cb9CVhMirCo+Ai@71|v?c(p6m?*^eJ`+Uj)7Z)g<0HR zMI>=q@FH8?26Zpmz}KB=Kz(uv=#u^dZt6@$^K)XMUwH}a8Xp0;>}k;Iz8MER^rt-#)T3hFan0KMH3VgJW(Ohy?COy3kjk118~)u$OSa^E0GO7eh_}=*XF(5Fv(PO~M;AtMUh!+Mx<%+%Ll@oeof( zn+nw*LKJO!1pOD13j*V|m=eCQ*L5{-p6o6XQG zoggTtd=1)~d;!{Bok-9lBa6%wJx9hmmjAp(!gR5wF6b z0asHbGF^=-KJ>w$1yhiQy*cEo1)%(e1Xy-tIy~(Z0nKWE!WTBGIPP2&vKqGnseI5y zyBwCG7dwj4*=x@c(7Fv8&q$!oSpbNyJcgdxTOhPa9gQ7|1TL}`&~M))^us#?O_5p- zKP!o$6+?DNOLqj$EFOf%Z^)uCiPGrlQe7yewi@kxF&bvwl0=OjI`Dz1EV|DSA0LdVUI2xY7<&q<6vQkS&1AoCUQXRs*lEuVI$U zCeVNOHykHuo14a}q0!2Uu>C_g@?DXSj)qS~2iAuPp8O}U`eiRnej<$?9Zf{_UtWVE z!9TYE=RpnP4YL>DhMq;j+_!o>D*vK^%&zT6(|%;b)NUCxPel#Cx^x_E{pt(@^%aqh z?^JNF+XUS=`U3y3W08&27jV}-6U>>L4=aA|MG+Y)uqH_ZMsL{;Yog@gCo^@lZsRq? zypV+7@|r9GyQ_@_kH$c@ZYrx3;q4t;Lzp6Op?+KFgElfQ|;>n zdD#f)XFD5O+|Yn=_F3T1pC_O>x*8b%lz_MN=7Jl)%>h5=0MM>I2WF2B7Ch|Aa7l6) zU}Q4k#pAWm)7l+;9kPasB}dR~*=qReaE##J{|Lr?)MxgY7QyFb+d$KleehA{RwlBy zjFCPs)H0h!!<>3y7OUzCn!6tZN%ynNFLNRJd%<#e>BA~v_rM?M$ZNv0rMa-gMGG}Z zn8WOK;`nu1A(H#*i=<9%gdV%a5UX4b-%rA*?EWq|PAUPIEpvmnoTj6r({#|{pBb?K zl{AXGE{+Z+FGso;=EAse3A||D8?fq-D(3oEz*{boNO5f!ECNB7b*pCrN{z5!j z?Zu+^U!2jkh3@drhe>eRy)=04&_?Jzs{xj*n}Is=X5f{9d8jNr3Z3>`gobZLp+Zvy z+|jOp6JuY2o;MTGC69P;{AUVsIvaq_mf4||ikE@XY+>$s+!|f+x`UML<6*j*EIKuY zLG`;vqxtfq(4SUAWbu0zI^Xgi%n#g%2IWVC=Ckv_Wd18-KNmn3lU(r3KuqvNykhQZ zRDz{?IpFcO0pRvom;?GqF#lb@2(rxU!3M>nAf>318Q*OLc**JD=$b-s(8rV6Qs4#s zq!%)GU(Sb`u#?f~n<{vpA+%R)1osmwpp@4HXt=H%gqBr+wSm9DHxp@4mDa$FHeLiO zFF|mpuRtSs?S|Dsb263lkDw4Yuq~1znp@0wD$o zEE02rtzX_U>s=Co-pui!S4kN{$D2UDKpn178x3YEoCMG2MS<8w^*~Lzh`G?QoQYF7 z2l7|M0k|~?ytTi_JX>Y~(&A*n&7yuVIrcFVx{!gc5^sqkX%QxMtM3wjT{2h+R1F$VK^#vM-rPU+TwU91BBjYz|WjlaR}N=@kW zZwKsnAr3LG3Hh3CsJ|cwqDB$p<5>*mcFQmeFX%(D_zyrl(;en#g)$!lp9*5!dGJ+n zH9YE(4~CQ7!Qn+2!upUZ=$myH3?sXGIQ)0koB6@dO< zpv#vxag*{F^| zO}RaYIiqRuZPqEq(#3+Y$&Loc{)95)m(7I=OI0B#nE}F$b_$-WaG+qj1FZQy7Q76d z2;0MdGM5yq8G~{;!0ygtp7?%e4%;3EBRC0MVLHJm?K&{ecMFs3?Fe@DtcRj|(_o$U zO{R8kIyiPO*5Z&-9f)nv0i$BGfM<>jQ=?wbl)9Y&vxRxmr1L3Y=GEmu(k}q4iS=M^ z#^eL71POTjbSzVJ#03nWNC$HcXo5IFGn~Jwo(Uh6fGZUqfjbB0!A$+};Fa}BaOGkM z*nGGfcwYX>to`7?SYDq74zu+jIs7O%IIe*?eSIo$bT$FzmqlPj-U{Yy83UIHe0N2^ zz3}M$Pt1#zf}eHP3K(fx3vL9*!in9dKlQQIFo4{s_h6}u zJRHbc!4#E;r5D+s;29@64gaYq`Fe^OYfrE?Rg1XAD=wsy|Sg=zby{(;qJ#LLc=VtfA zTbEPdf2Xa`F_jkh!@dStl6NrX%^|1}Q3oeFOoaaJli-cj%IMAE-RNtyAzB)nh>W{Z zK%@K>xP-n%ekBA4sSH5JTPD!0E$DAC-DBw7g^uP#sHEslF zoH8sT>YTx~--z)(EeFjT_b|bmhQP9G<6%;RE>H=OgfBK-1jkaMz_TJVFh9Q=OlU7= z)VGB)?dhX|?%F~|anvZV{G1!uFsBM^yK@xyGv|TqEG;l5o&^RXb7&~&0kNlEGYWeO zLDw!!5nzJ5EdxdbUAvru z-Gh@s%*GWU>Df(i+f~pCoC9Fi{>i{DbQ1a*lmq*la$w7^Z16xo3Z^$M1^o#%(4zhp z*x)q^EHuajpC;`P`f4BOJL@&rbEW|l3@?JoO6IWQ*IlOjdmao-9Sc?uu0;t4Fxc`b z8h+jW8}3-30vT6v^iAFaIu31vtLKPAqvvu+G$t0znG+18J=(y*bv6J@&I7+6%EG?q z(&)C+0azgH%uML{2&S$X4<`k+!SB90XyKf@pvG+oD(PHgByGRJ7jXt~;a+oCRriJo zVG@8Sr~uZMXTdp1iST``A?&mbfbWZ)!5_`5uti2!*u!jq1;?sEdZRchZrTDiejWf? zD4J1|G=Q&jW#ElCKX}rji#c-m26&dK3Y?4kfM1X@c(1O@1pjPi;`1JW3*SVb`AHS? zuG$z#bR7X^9j<%@$7WuEbB5`0kC&oEKiqrmY?-}m63lM*PrI}0Q<-Uf%SCqWRTNU8RolNxHG`U7c@$)1g-V2X{HXVJI5cT4x~iD~LL6?Rp@&CM>bx0n{M=Vy zecc)~TkjcY8J!HLW^v$2hc+7N$$?M5^+Dy}K~QSp35U0M!T1mP@Qu$sP*EcX)c1P8 zg>MRA)ZC>|b7K>*ojV^MTGay1+_?d3Q^n!!4Z+A|k|rn(pO27ME6})F4@--5(ft{X z@cE1?bbY5b8n^cc3`!Mh+ZR<(t>Xhw9Eku5$`j$GoeEHY5W#sxmtn;ORdi&y88(F* zpuXfmc*AfMvdcYzriVtL+JkXm;oN9A&u=2AF*8JqyTy_EfFdd=i2_A}uFM65!r$4> zC{iE76fYCFc-J-<{be3J@Ovw)jg>`v4g|vRw_~C2%>g)a_8(YT`we)@&IE&gX)x=x z8Eg=Cz~9f(hbBdHU~!BrT>tV8OwP*zU0{Go_p1fBt8cQt{W_yQ_Z_e{ zegXP@t(l8uTfmfra^}YZC3wYJ1JtiB5I+CGF|oEV^ZshMEk+97+o%a|y2Jsc(#J4I z@G3rP=Yh1j7z|512MQjWf#$<{@JgKsuuZ4oTe=ip{S*!SB@AKkE^V}X^)#e&UlF?R zegLnHS-?ykr-J@l{2WMohrw@V&CKUNMWCW@J7g`LVX^E4xaN2rIR1PrblFkJ%-Np; zZ5_(g%{x8jZ;jw*(PCk=q$(`34(Uk3gE&~;Vz4Q$C%ic zK(X~7nb8skOi05#aP#L=@Xeos4^ujrXp1bSCqE6m6XpaXQ&TKf6q|!?(|G24iZEYP zy2DgSWWd9v5IVRyfyXyA;pq}P80kC-`YBN$naRRgSABqL>vhH)X@YHHN15KYmzl}; zyFn;y1O<*-@cgq0u;%m~#wt4-oLFuM$GL~YZ)sye+txzxrMT8Y+O-8dxGL=25DYIx z6*BkUC4rOcQ{k>)4cIuY80NyMP-Ha(T1XrNmkoeWz6PxG{tra$91q*#rVD#9nu~1z zujqgY|L^F4^;_2MF`Kht-S*_oszxbdqs0E_&*}otcCRa;7r$L4z7K<`Jlsg`PuNi1 zEPa|c9X-PCFV>|8wOZ(F<9NQH-Gtq{`67Kdu9h8;@nto`+y%q(W;V5|pFN(O$6}LS zffaYAYHWTeNn2yU{@K?>UgarN7anNgV~qc?)>)hI*_v_uxb^R;6IV!6gOsbkSy}O) zf9BGF9V_S*rx^M-ZZiK_zU|;GB08AQOI=dnKTlPz-m*lFg_aW4 zN#Z&DgUG+Uc7Q8S%{ux2d2A8Mo3-{PFG-G{`2YFP{-3mk|MS=!$5fz#*bx*3|3JJc z4O`0|!JAYb;j3rEP{2QDoHjiaseX*dhyGiRZZ$;1-u>o?`EM3}H9QMnjC4S2Pc1|}9lAh`{%+a!j|N38Mi`EK+k zu>-4@FGoR*4B~4mQNeeFF83Woo2_PG^`R>4(6|_{OE``1D8`^If!dgHXvDSFgIG6k zC;Bb36YE-=!J*Ar_|4%-ICP^f=0zODGh9%A+k70`xeKQaK15Yjiny$26Ee<5JPI9kU4N6%CqA@wrjsOZWm6lWmGdoQ5G)rBFUlg0Qymdizl ze*=8k z`B&lQe*tuOMIN0w>nr*0AddMoFaFs8LUTTOP)8>_wzo-}w=g}A*%N1|t6MksXtO%p zd2WD~y|JbBUrx}n234xH$c&Q2V_boQ0WlX@@aD5x(QS_y9Jyo~QImUv^!k_3G_(7n zo|pRcdWS5n;1A(yXL0geYCKQeI_cj0ELs@$4f{Jf(moR{JpHIIeK2E)dJN}^zHjtl z3u3O*)vGGWm?}NC!EQgTb}>ZzWPc#32Ua{IZOxm_Izy-FRgz1;j?<04W}@UVg>=VC zdlLO2QuOLsk7)S>VQw6?gw#G27;f8K*~}UBWP0~nu5WKV)#(w5%=ew(I`?PMW}jTt zA2*Gka=?%%`^m9;M7!u$gLEP$%Hnzslu`YPX8h~IAyL|pAGfJzDd}=tNE#$#d7FAl z3pg2CCgh8S8HD1laqr3IJE5e=Nu3{m`~}UL^Nc2U3S14j)41t)m&p2X6n8uPD%s<* z6E_T&@GI=^6T1z7=Kg7eE1Tr#jW6{+Qn zB343rX~*go?&^RuFL{4H{ZB7}ICjM#|Lk~fJFTWT^(?(~!j^Pvtfsk}_kdgfs_2P_ zH5}QY!TQ;0un(3#=7xJCFuhzoXOU= zMk2kI-Skq}B+m5M7i4YGOIr_QaPgcibyK}cUDl4`SN1?sGq@SApHm>Z&?ZI<*Spev zrI>#09i-tO-_fC9wxU(zB-L5mE>dYa zTbLooAJ$B!J}wD-&9xbp@_I?!l#6rNfw_jPbh@K494--e$)( zR_~uW&6VvS%UZSB7gHnYBSQmXr(#ST)TZ-hOE>dQR^9YNbOibKy^FKiv=KWCx5jiA zJG$P7DEu!i7#MszZ0-InfM{}2Od|6l+mG&mMx=DCoOGdTamBYb&e1Uq*RaxGuR^FwMksOJSqcJSpiYJY7!yAdj|Yd1vDhV_;BZuJZjupkFPW zc_hzyEHAxtJXO;(X79B8p(WeH$oi`viTaz#wD{N!8uM}%(u3;!g(Eur=`k5psY#Ng zP5R9xJzN6UjmS^|yN2(usvrg=7BDoN70bP zeDWkM3Ag7*@j2Vh(+w9dqTuA4r0$U}QAzA1_~?CVI76I&-2IilIF?VNfXX%Z^;Xzmxqb5@RLnqS$G?0{?qwGL}Dij7rzdWl!3)6IftP`(Gq-rtDN! zrz@WC)Xk%+#oI`D<7zg;wv4>cx{beX5JzL#39OH|>!t;Vv^339A=-4s6Tnm_;3CWh4Y9p{Fo1kot3 zDfH29UA|n*f=vr`B-dtNB1&_HaiG{MdU1Li$;)2PFR1@a2liy}?d)21nyL(c^?W!< zjGc*90_O8`1^(1-o%8&vQ8QQxj$vb(V`;%)K0V{#$1PmsPLIA{Ox$kjat>izIMawy zqP6j@XvvoISj8uiUcB%I1(fcp+!4OKYK!eHa(t;6T~X1_we5H1>MzVEpCu#d>_egS z_2C2Dz__RQ;F?k7zkWh$H<@#rb1bP^zb3k!aU73+`;^|kkc0H;M75MPl>D+DG zT&}Ho2&+rf!s*M^NpQR}Eozge)5mHPjigE({&5Yrx_1^y+HeOoWZxssv57P@HiBy% z9gCaPZsGf)y?Ekw1w7~3g-bccGf9(`4L&m9i}#&-VUZs9H;$;LP150QM8AnvwOEx9{# zhq~&x@Rv%avijkJT--rR!B4Nm{b#377s*JlAAMGm#mD;z?-0Qm8)ecb#|+4#ZOeJ> zN=fuz@kBmTT7uNA`cAVOmLb!1vSjzHc>dNV75>D<2>!&hx1>*@3eo)!McKwj=*FiP zXoQ0&Z77nc%FV08Q$}atz8paoc-mBD_2V)8Q4s{AM7JzkeA5#NFe>vo=f|G%_Lp*jTA)7;i<`w$%*kDq+Ki= z?Hu!m6l$KJYr}5~9NlSTcy}o|s-w#KZaIZdUz$(-ZGuHlN_Fwh*0GoN1dXE?U9D)) za6b7lX04DH{+vYYI*qg!=5g_5BWU^6Xw(v2Le2%>pcPsVQShKS=Pho4^}c@=l};qw z`}tm6%~Y1osjnxJUlquncZ+DCx(j{4O%vTRo==Lls1w&G*YTT8EHNDS55u$m#Q(q) z>UJQCuGDp)>a*wKHD14~41Y`0(0Faa80FLBqg`<$zn7-=`QSJHbIAAYkyPpMR*G-f z<1PI&L_hM?@cdhIarn&&yd^RdCl$JihCdD9%Nf~3xwVuV*p!R|cTS-PKTf7mYl29r z#Bn746Oy#sqlsj}m#Y4QONjp6)0k~lpjNggxCbkGxZcxdcQ$(SCr(X$9_?CO%(X5Q;ayxQDR{Pvv|4m= zF`}ECXMNd(28uq-UYQ0YY=cyja z{dXr&V2|y`rvqM9ezl``t9UB*S+f(5r0CHdy8pOko$kFc)S>jwGx)`tumwY-*KcxrJ!FvuQTKt5_C@q7mZ(dK@hf=vye{P86 zkE`N)r)=oswa!Am{#tfh`D&T+>S=9Qt92SY~M&D>uzfaiO zeAh|H&oEjV{g+#9T#gq{)#0Z^9AuwmYw%%4mVDhbHGYwr8YeFFYHvpj$VTrB-bJWC zMNdcbh2aLc!*D*^JL3~EG0mgjw8D7);S_e%`TMlavyIl$cKWZqkIJ@7u-d0bahAG& ziQP^;67*7>AEP*#EReM1Po7`EOHcRZ*X#*n6)u_ZbG+1u{<`1zQCJkOeaMo(?w-O= zmEXlSM?WDaCvK%3s>-}qpF35x451Q|YUH2sD*CZZn=KE>qRM{`uwk)h^r^ZF|K||?c2v_*<43@BMr9KmM`82x)e7dbUf90wn>rKVj(~!kW;x1D9U_VWq zu3uHwG>aHHpA&5f+(>1<_VB)oCzG!-lDwyKu#kzbNzQHlgTMUvOcz?L;X`NtWB(ec z@sU%N*~pT|#Qh5(Z3_&E#G=ERL;+sHZPK#* zV37&_*B_2EgnKw9UmzH>L)=7aM;h(}MsYdawE+J%vd^tTj*FeZszeD8mt=St|32b;_ z620k{$NFrK;{67{a38%75`A?y*4@8`JerPp+uS~K*XAneYMI6lmb6hzzYz{Ue@rIo z-5>_57voja-cZq$&myl|SEy<5eztkuE?SrLgvuA$@h6`J;{sKN_qe;CJNaQ9FS-9d zKJnR}Cdk|+1D-&3#a zx5@SsH?UgzQ#_+VlW*0`!KO*SIR7i5bn^FPdLylrY6f9aS{lKw@LkUCyLOMdMERge zajUpd&Od2;?4v5V*O5X$`y7vLktAIknrYe;$lm?4hfU!_=+TOwR7K!^uaH*5*W2z; z@8yNmX1tR~eCQw_vAcj@FeR6qpZ1XZI{qB7T0e?4Zk|F0cAE2@ANKPe@CYw+d^>qE zWJRyb$k5Ga<=896GOX#Mcq)GOA~A4Er-uVas@$HMa-rQ?)Da16jCVcAiCIB5xw}#M zA%Vl(Wy|RgM{;S07vfRHvb5oq42{YdMWnMAkbew^EAcU;urZ8Vrrn0?ZtNl!pp&q%y9RcX(Y{ZG~OqK5my!{QMAjRGg_)fl?STuts}vtcegIRZ9IybaQFz3 zJT#1Ky>&_K8c5^JWaz26$yF0quRxz?@S-L8MO;~n5fNK{82!APh#MtRxHAS-L}JL5 zTXd@lKTgyq(G7dKsd0|v{;UB`D|ZVy7*z-UTCB#-Rz|qVW-h*XBZcd`eub-*m*JJw zs`xjH7%H8zf~X&U!8Lg4lh46V=-kyWMOkXL)MXpMJY)U0wF>5;c zp?!#3((6Gt3{KES5R?f}YN3QDl zVW-3etet@-dv0b0HHbgQjlI;z$q4uR-8*$u=7WbQ(5H)guy`TyKPiLz3+`gQ&lB1A z-|uibb!{{|WEqiN9W2`IU&KB4jv+VhJCnI*R`46MuhSW=z1Vd*i#K*q@;%}%c~ts? zIQ`P(qlAoWCp?;;uU$r(lh29VZ4Gh4bB27GTR|FLXOKQgd49QH5$W14OFEW$lQJfg z#C$r5pFEVLZUG5YJZCMLDN#Tiw2MgHoEhXm(?el{>>e4ma3z-I&78zGUAi*$4`-m! zj5R)W;}*9)$bO0sohTrvgWn#Zj+QrQ#Pbid?B_bF`p1|k_>~j8n(gG4@pyXD>p9+* zKEkC1w9`joMI=%*mEg2T*!HOKRoGFXIDT=leYB|1*cvTDu>wJNTVj ze^1Dgm-9ibvtr1LMdPW|yFL8FH)@=Js1m!|@h17%;ZBam9HS>XF5_()$~64@SYjM4 zNklq#iGmJCo=Eyp$wX69FkFFd?LI~$!-7Ns1v#7-6r+i6cXLH<3Y`hsjqr^C} zj`*}HV9AMViHFxMqSCjH9zUhXo{qmq6E!1fb*UP^U?C(KtJ`Ucmpf_B@Zw+9U8Q?Z zH`7rE>wwnqH=^6nK=yhl@#*f$JY@cohIwVk;!F`OaI~b;W@*q&nN%`nc|p~k*GgMb{@>;3T#u@NcIHx?D#pf4F-t z8(#T<$iCT0`V}8gEstm_)g(&;eFnLl%%fb|u(QAx!>r!AK5EmHK?>~^$%85T(2wF- zG=4({mA^iROskvCE8FIwj*EoQ4dbfLY?Y_4KFp``R^1}y!M)tSM?C`51dxeSZ*fDt zdbEG{B$B*UkGR`iz)e=hbg*F_`8R#8(8paNMSH`@nj>qer=Sl{ZyzR1uvZ#yzJ zd@GspHx3WYW610C(da*y7s#gm0RAd|mKxdqp^d8vJtNc{!IQ?5HwwzsuUMX)Yb8sr zYuzK;rWRBkx+K)@Um5y0v={%nIh*D;oafHAXtFVZC8EH*>A1J<8QHPR2cH~TNaoNp zBJCU7sll%;Tvl&4_IWjmecHH)ytsEq)VgIEVGX8GGksa|%JMP3WM#~ali_LO@>L{m zeG}D^%ICHm)1bfAZjm2rJjv~rbgCW{LY`Tsk*j~zXjWE2rIFSVdiakuMJbdxo=hNx z!a|$nT3!4lM1hk(+Db)+PiUvM22zq$VXNg_$g%^qbe-KDdfqadZV!wgCQhUHDVyDp z_1}$j?}8n)^>+{G5i;B7+&w@HLWKn!eMhpzC9GnTn;LESd6R0cP-VBR8G|P$Yv3-o z2gsPqr)BL@)S*O^97rf2uk__viDNlP%)Xv8Fg#A0XS#7;tfKL;D`N$os)@i>(dKVY z9^j62NU)Qelu6&>LZThtM(?h^&RvWy!C{tR*PYMeLK;X-}lIZy07@d!F0Og zz-L-(U4(Oor;uge!|)2dbLi&4Dq5-*Od5SVsmB;u?EmZ%-EOZzxoy*^seLF7Iv_IhHbZ~RLdd+fCbEcnbD3kLsOPFNbbO06?;oB;r(5O`n@Kg? ztC*ws^p*rlUdOk<`je zyaFUseLW2_DW-;tc)A;9*FPs3YGY{VDS0yOkQZEJmqxGDE#jsqIC6udhehjtO-8E* z2%hoYoL3C7q3It~$m1CtVT!s*W9khu-Q^^To7g~KWrfprfdk^|piCYnDdQ?{MFd*Z zv39UJX`B~F3+{B_z=`>^BxW|+S}~gJyBx`VieJj8%+?dm!#Q|v$sVpbsG2LOG$5a( z+(bKLG{{>kIb3Sq#*Id+@uGnHB2(Fds)fHaNn+dvykzHWB0u;Szgo7Ecw{7STNJN| zlzxZM0GrX|*Bg7%l--RTiqg18S}Js4=?412eh1Ho=(`62Fo_80Ki>^5$_N_W;gAKH6z6_Zx9e}?slOo1j zl}MUlBkpaJWNU`AXh_FvTwghh_E?Q4?543O;IIzYa(h_x`m`9Ck@AIGs34*q?T+}7 zqBMVj3S5I3ok*k27uCMe;XnUlc|Wme*73%xtjvTY>N9y2aVguvn$|0l z*O#u)+VC^<^Xq0TnR1DiA%;w_ujOoIRQaLxKSjn;LPl@dFZx^YD5PN%mXMWjcJhf=>RB!aMeT zq}FQ{@WMrimM;0uiH}yqUSa0M`9`QnGryYmeBVK?Y_;RZJ0;MD>fcV4%a)*sO3hdPz{kNV?zXW3PBjQ(WyU%(*E{ZPfZ6c)3~>JrF? zi7)97$qQV>q%Aa~OW;@yuO$hC)2ZwQ1>Uk`6lt{cV7cYNc=Yfe+}|5QZsf(#Zy|R0 zrF%5te6;xdh%dw;=e+1^n-Q;#lKIpvN;GW7HEzA2yUC9Qq-)7O-X>!c7ngjD@87c& zd(V&M2D%!!CdKdEu;&>*}6duHBSZo`vxM8w-AkA& z-xf#prBacS+D*D(K_KlvxRg6R(+A6&M3Q^Tmh2uRO{8ST@nvG|T+9Ij>h$R}UBA6c zcz-L&ON&*!%$jiWU-Bh#=Eq$UdAdaO>4P=hfp5{u>rJG<$p-ZZ{O_>-Kw>%~PwJB* z`20>I-mol2S109bu(S@mGkK&{`wO9&~+VA6D-oL?;ZBdj;H}5v&)pg5weWw>xIO;^g>)h#<)}`#tnhRvEl>+UQ z-_6wHcaW_0`^6fbA|2+S%~mL7l9U@ar0O^N$i09W)b$J^cm8VAp~#BGxn_{)t>t9$ z?EU=81U|0Ve@!khHqpAXGw-iv9$dva*>YP!N`Bz2$TM^ctQ zAd~Ve`H71W`!I1TAM@vt^!xGKgiNf*x6KpTJ#>+t&=i9gR}Cbs8={EjFF&@f{sjLh z(v8_a{L0T<7sk;3{nCHBIYj!sAN%%743s=}j?cZ5%V!l{#mBwQ6Z3$!>Q0#(y#1}U zG-&r9e12dsA9v{o`E=Qy<+c^@)sMAl##(!+`!XwPym_6}Zv1L`sab*lK0XTKH$Q}|@gH=A^|mc+;f5T^sg`#B#Z?W-qiTCeq(ZNB(TfOraP|l~^t_p{cXgyN zr~3s_0|8FXP6@WIemj9Z1glbBXl#% zO{);54jXSLgJr0#b%-S2Q%$nyR4mnc5HFED7Hjiq=7OO~|C(5v`GWuYagy|nadhhr zHL5W`OYmHAhg2$rv##|f!i(@RLSKWrP;GvgIyt?T)XzOHaed({1eR^0N&h%LZ)%hf zopzI6a5UpX?~D>wZ`eyutZ)^aV?Rg^ua-(0pU4RJ`nC%d=Fj+Z4l8Nujx=HC0wcC( z_N zu=%g-Bt6YllJkQKn2MG`jas$nbo1dSt7IfYPamhb%DED$+5omZu9Z1kPola*<>}mw4ua}7LptWGy|njFh4A)pjBsd2 zAbp0O^Gjx5C9)M-lJ)yySofoeLfV*1Ldgan;b_kavaPO;bv4i7SGnFHcF~5CSIRs1 zC!!-8ajT8|%XOz;|E_1hs&|mV&-U`hZtrdov@Do8B@V8ymylK=_=q$p82riL$9hg+YKdS_U~pH z7Gv31BrkbZd7Vsi+((j&P^tJs6w&qp< zDIIZ;C~BtR64jXX&7CorHjRr4J64u_fC9)Q4+4QG@>{snlSnih2WCl&==gVHFeqN7gV$T{n zJ=Kkk47A|?HAj*^hh1s-!%FtFeFE)?Nu(P7kE$~!yeH!l2QUvOf$j9z%P&%wktp^n zCrv-Cu(QE%+VAKQqOIn|hW}QVgzr4h_nRtV$)j|H?14pic5wjNT~aB!j@_bBYr;R) zK28fOL7Fr{97!F7CHiCU(6FD%eDT~Qw#HKQ8mAm!B{frs)uA(VlUoyYTr4MY zN##cwp|or_n>xCUu3dA5KN#_ruZVumPf{JgCWhq;>l)88l@YP@xuGtt%2Z$xANvbE z%e;hbkw!E^m$Cij8j|rhWQAG9)%=ww;gac(8tJm?oBRy@t!(1wiOg%<3>LKAg-$)) z%uXJQXPdnyG37gBB|iJdk(RIhC2e`l^u5(4wq9#1>#v~B_M0mU%L8umD%uJxX`Q*C zc3PRO2RphhRG@fQ4!>UBk=C8I6t;dhX9~^o!oiq-tg3Yio3HeRy*vxN^&xwHldCfw zXTE_RdFCQGj5$tQo90V;WtL)>J0m39K5k=4&&DthaXv%+@OY}bPEWGkSt2RU94Bdc z+|9;+cA%g4I8*-@$*g<%NfPZdLz?dGCZ4Y=*vOQVL?Kz0JbELrW8WQFnrjj-XRwp) zyBJK&AM0XC7bkf(OqPDnyvY~Kog%Bcf3ZQ=)(~s2ELt67O_L||69)c`p_^u>)6(yG zR7O{UcmBPat=o_X<+&+)Qz>C>@O@I`-5p2 zXA{R2!1lMT9o9GfK$vqnf(hbh@9m4-2w z3$Iv`tGhJxM1*8aMt@0U?Q=55RYKpn?vm(6=LjVwmZW`2IFs?95_Ll}iP@K5*yR2P zqA1kUq%9ly9kDzws$=>hDWvYg=i?*}hU<=O3)N7QQbbcu@QLWy(lR~ox@ zs4#PMu%te?LC})UpoUkU3g3QJOBzB>v8w?xlrtS7%%nGk?1Y_yR>-g#W%-km-q&~O z;@~s1GTl*_sw`zu+L&z%*uyV@P~^CGrZDZ$CGum}3?W1zRpQxjf|NfR&+LmQGM9cD z_-(PG#Hg#5muYfhwzVwexku<*^Cn_{LG+ZfkP!QE zESWdt0<|n468p(kl+F-)GtYI|q{IX|<3ct6=BNp8Wo^QAZsf6wCV6IDbVr)M_7hc2 z4Hmq@AJE}x52aJ8_R+f==h6+g-2}bkM(nQULFT!sSgQR`L!vxgNBVJ|sw8pQefoad z6FP12QucYCA1$-m$gFpbRGb%#FaYgQr}2Tat!G* zn_2wqj3IoF#W<>P_zE2qBE!tS_lb2~EgIMV0%@o#qD3_ynVab|Ug&KQXRUkqt8>Ru z&urW3iFPUM*9=$oDAZ6Irg)SE%F5D>4?KC<`_*h%@kY9Og$zHiw-pNJyRxzU##Bv^DIiOi#P>Iv5e|V zs`^fetq)A1{Z7=;6>ppQoaS3_w{e0rGMy)%BS)|ihR1mK%@=tCMF;AB(2Rvd8PO@N zC)r?qc_A};1^IOBCcoP^gVMeLmOdp2r`5$Xp{0?fzpW-Gxl?$(Z|MERq+q4R#MMR0rc{qcP#&&EeRFt zT%Sz#^LdlvS?9levQ*uHE&Lu!vHCSuR+mY)1->CQwbqhnbM$CBiK8PPBvNaO5H@RJ zJK6k#V?%QdY18;;{Pfk^r7}Yd`5R;%eMI|9WL-5SWzPHQ{Y&b?hNzKrUH^CD{N7v| z|2&;}SEvcHr_}|Ms~zNGR6MD3jAZ(kl$eu#9SL|hi2m->qkH`?@M8mW_!p}RSn@F) ziP_(|B%mvpdG;SD=+1l0YK~lBa3+!^RJXDj+((*zDvPZtJj={0zw)*D1vtia0Y!BY zEL+xJvLNFap7F$#tMnl&-2{jW<>ieO{B9J+xqFLhQvF)q9>Bzl3GS zd?$R;Mm}@R0A?l5!iS8vr+baxvxhetq-#hd>GJWSEeFCF|7{RWC`L?4r;rs7abjgz zyZG8C@qBD=1Rv@BoIX-hVng3P273AsnWm{rk%tYNoBNh|UN2y_k8VoS6=GO)Rw0e* zxJ5le9homgk>QmMBx_|6OL7Pew#va#lM6^EUsYY$#3|~%5GM}V+Qr^ z)YSr5t3f8+6Qd{zh;wGqA9Ptp_cFGn(w-!j9_6npM$;$H14(peKf!uN2Jtq}4(^_gOe6D&$DttB&v)wt|i?n(Cm?x82|G-E5raBWg_v|-P@pde$U;KxT zQ5s4Ae!Pdj)#E*)BwBu;sU4H(ebp}V%JMH6SRKal!ZPUUVsZa_Z#U~RtL0Ok znA6XWXQUost}JGe3(b4Uu}Jqkvd88-^*VHc=x$gj?koEN%E$DLNM0!Zooy477NFAEBBxfr|uqD<{*vRJwZ0V7m3_rb1H{>OfYsy#XZ-+`c ze6%Xwa8F8R<@_Ss8=RQMpZkHLVr2_i-tkU#7~N zo>1lc|4F7Qp#r&ld?3A7KZ+V>dQcQ7VeQX)X~ZdO>KLg)7mXDAjnYQ`@5df1%!C@N`O!LL{vg+H#b=e156lRs@LbU?C{y!3OIKA9|K z83x<=b$%|qmTe-P?PN~n`})(5?wzFfUI8nby_kPdx|-#RcW8pM`qQ%50&JooD@{G3 zO&@D}@?INDsZw(Wzs%T_`USNSZR1zcq!;7KE6$Fl*e;_TPHk*qM*|s?aF&1OY01#pPg{8NQfo_Hg45pWI6po*2T0R7pVb_bEECyFg z8X4?$RuK&!U_*odl+vlq^J!2`g7kyB3_Uj5fwY*m@&6KZS&9F2nh`6`-z{26gKla_ zevT67(ag0ZZ^n&gyA&&FoaP=@eMo_P+P0T+XOt!BRVhTP(t;1n){xw{>LY;%R?|iq zWmFE_4*iAV41`J@$4t;xT8Q^(|oAyvil@Q?DHAF zHNmAu^J(NLWj1`7H4AKBL=&79>7Rea^zr%cMET?hVRLsC)y!xhhHZD5=i+?6??)fm zyzC%Zu^-WOHH~D@dSyDcwF|xUf5NI8&+%iA*$VfAqF88!JofE0kbDeisebt@gC*wr zQae0=PM$1Fp8Oq!m5=SmA1|xXJin*xW!1o%0WEp7=9GodtJ22GTD+;=>Tue(tdmXn zl_;#;JjG7;zJ*YC;1XR@YA4Z1ccoALVyWpaWvXN8AcUPWtVt`06LP%WC3@cDsosBO ze7J4APM_UgYDwk6%BCtC99hQgyX+i8WwG(1PTur^&_*3m-G>gU4Rx8*b_u7UmcVm|A1 zwvluP1xkz^=JM}^>vY7jU2ID2VVrz!0*(B3rB`pMS|WMU_)MbW$q!T9=Kxs!v;uf09R+;^@7LZe-f_t*p3y zgtRtu3VZyHr)nEo=#Wq&cBnC)jWmd74^OHyn0uD;r@iUmW^ZxURDpz!jK}v^v{$*n zaB0KsVRYP5B_UjKd?K29U&ygZ_oPi0di;0#kiRTr00*V}ST;H#c;-x_ z=JV48x3DN-|6P5yF=!Vxb&L_GuC@sGt`D}WuSgSocjr^)=OOu(VZ&yiR#xSZ1`|VL zg@fnSYvkr^78Ht->5@bZS`?+m+FN!A;os*7J0nGd9rqV5AKoO{ao1W%n&?RtzkiaL z>6=RQmIMjY?tfvrTS^BX@^P)So^HKA7s=sgq?%!`B}sWYjyv$y1AYd-NzHj~`6m@ItInIv3X z(I|+We_`LTP%>9RMld|-%U=CRqt9L_N!-3ENm4qRsPx2N+8!`mDDpkaX6IK+YYhwN zY!e;k^X?40@b?#QBCY15>>HTVH%sPyV;1Xtc$H;Gq>z%uUwQlaXR4!p-toc7ZOmg- z0L|D~!ET-LVjaKDgwMm*^Q{KieAp~CiNksc>mO&t@B49w*SV$3zR$`eLn2m@0=p%Z z^(9p-a-M=@kog0q?mK{nZVzQ=bVkyeHFKoFljGQOt$Au4Hri?Tp(O|M@C}#vA@8 z;erk5P)##x_2~^cQy5I%w8&DM=#%W^R-Qx$HL%I)7Bu#uk#xU{87;QF#@=^o61m~l zbgYH0FyFb6_CD!Pzt-fji`6=@j@6Flbvq$+f0(84tKAGiE&aV+H zdByn6Y`K>P-JcYU$85A`p=Vu~k^O!$px-!Za=}RGTX2;$*M!rqaSzb`02$#IKbCGE z_mBkIo}+TRWa+J_Aby9TB8@$%!p2UTMo){sMHkYHS-(t-?#w8auxenH|e$ z+^ZdA$(UHG|M4*Iy2+4UxE#V?Nz!5ylveWXtM0Is9)6P3^PaM&K67Y}^&7nOwF^7s z?ap3%ZDzZJt7+$vdgiiaEdOQeY5rip17ffKAX#`Pf$u*hP+HG*kb?u%=!QFU=&WlW z$g;1oEMUqZ(sF+S^8^&mT>7w2Os@Uk+!6-r311x$VfXCa^U0% z+L5J26KAN9>=7a4-h~F5ZC}EVOPfyY5)TlS+@I1MOIhkxxP{+4CYgTjKa7U!`QcLw z;_1_%2t27}IMW*z!R&u`lQCtrtT8H^Bn%BEF6)h{)o*=jToAyDf(wc6{pIXQvwGza zOL;nNq%H0BQ)B-2Ds;jFBkH-WzS^O0IyKt*OnPk`&o?R_Bzobmi0-e4MC(KeQNE`_ z)+aRZ-%=Oi%a%vj{A;%KL&`9ELU9=1oZgMsJUuL(I;MsET(3c8+eDDo@fiv62*h1? zchS*?n?Paq8#ep)F5+PMfKbbItpCaiX=cR;^7ivo{!5N6jgbWM50@&?QjJF>smh(f zAUX2z{6dx&s7aqTO3C@~UDQT%U-d90Ice3uK&iptBC>vz0e`&ak2E4Sia5#T;bG2p zbi8UOea^ek;wuSa|8gh)srWd&6X(L$)t#l?)6UZ7&N0HNcNw(h_d<5+iUm^;5z4w5 zL)d@M75G_W4J4*|VTmb3Y4yp@za)w?F z5%m6c6C1I;v48m)+HvU;pW4r2WS7ySt${wb__V1wmj~7s3 z@JSe4{*IUXQbJwJW(fnFQg3*M1+J>2THd?ZvtLeZV%te}E#)))JYt|E>+dyc$Quhj zpWe~qQ+CA;SIvYtjC}n=DqUcJ5ch|KE@bqyL?^@V|ls{wHza|3?R$Ja!wbHs^us9CbY4 zBaaV?nUvP`chIc%rTT|a^2`-@!LT06Ku z*$9Ro^F<3fcY|Y_B0l%(EZWfd5XDZ8z^9&`LyMP9fsKoeVOp6yjA*(AgBvfwcc%nc ztK^EEn+Bq7>bCgOUM=9~4#%deyU<_HOR#(S9;iLtimsOL#Pw;}5XsbGK-eA_eDMy7 zmk4n8%2ZsmYKQn*O%QH#1YdJ|izcbHA+_s&AhOvX&SZLn_TeztRIH5oW8-1W@hcEs zk_JXRkM8PU=H7q*5AA<_7P2;^z`s+m(64zNgx!e3*`^EN?C@mRwIT>6+V+E*#iGrv zu7Wcm3%i5HLAG5D=(|I3vpf}99UK5(Jubpfaf(O5OC79S#SEc2gV4cg=g~8r zJuq551uQMI(2Cq7(dN32E}s31n#Z05g+HpGloSm5FB;MA_oLzB*f%KUxGOx2yn;?; z1%Te)WOV684P2a>2*S{KxYI?UPhA0K%-;@+Vicfke=wMy$O41oO7Ol-%!b|`503-( zp&`+C;g_#2Y`&%dU-tRJpwMa1zuX9iT+J*h(mDJ-elsen2R$G3?zGohT|pr`@rR+ zEryC^Wb4O#yn%m#4fX_MjIP3o)bsd|+HIIso(EYKU3k^Z42bZf*#B5DESq15-rmUu z`3NoS8Bl~bH7|k_-KDrUA__Lf_(Q$t9eh+j6}NBv0ts3Fz*Thb+S`7@BhwpTyQ&RN zZfOAD3R^JMc#nO}&G4vMDrBm-q^!QS1E>0zVBNm6@Zp**4nom5W#UPQdR~jo<>bjx z10I&{?ZR$5df>su0eJ4)T(Ak@;)$cHVKyhm*Gud1xz!a36EZsL-OtvJVSIY&xN+YnDJ*pIb9Rtm0v+n z$UbD9y#ub^1{|;B0FKAcz?nHED5j^9JG1l)TE1lyT+~p+JJX8Lwrjf~L(KRuS2&Bj z)-A)2`u4)WfI`T>^c8)`eTPcl`-0=nVem^s0h-Z6kWnx~HK2&!Y^a2GwNKFGU=Qxb z1E5Frb>_Fq!1XytLAP}+X!u-0I?NT<=^0~>1IG!$xX?SZFeLm|s=3goU$ z!zbRpf*A^fP|c)&oX?TRoLq7jx}uW>vgcaR^ZYYtcSJ9`y5tT z+Cjwfm*~x!H2Bwh8jZK`18+|~XdLznwdjw5WOWZNC&&gOoY%n4wMFRYqBf-3)QH}S z?upX1ooM!_Q1FzMz>TR6$WKoJG8*DU%jG?KZIlR)^_0d*UT3IcOrkSzy;&y0f*vJ$;$$U-qXYT%idfTkT6U8wNU;57ULdU~e@ zvacw^&)^Zr+`t58UTs0D`Esz(=RXLV`#~HRc?;3va)0uYJ(^Zs$9?-goO@Ne8g1Sm zhZ0VCAgvXv(YoWo$aOe^o8jK*biH{YS5!4*jTmkhjIHUV|(3#d@-0CLPb ziu8;{ODIv6>sr``uFumJvplDv_ych${zMeocFCEW+CCALY#RzE%Pyh)Vy|r9rd`PW zo<3Uppb~v7)q;dBOEh!yEwrGxn7d(=iPHFuXpQC%^dI+#GqYL-OMLSBB}+NlH``om{1>zzU}S3VKPq9e)q z>n}<5aPcao%V8oLKAm=tEyLfIpTf$2@`#hH9=TK~OSZOD5~ZFKDPv z38>`XIWIY%24Zk%-#cb{}|@0LEt zE55A8{xAEHbIz4y)`pQdW#(`=Z{&p+$!@}X#l)!q7h|02s*V5lj3C2(hT|~XLhO|P z7CXGViZ1<2#;XU+!V#iJKAt(^wniPYaako4S&zZ$A(6!6@@tqrX&nA;Y=Jd4%*Pu% zl}OUN8F-k-0PKEpAc?p;9`;6>l0|Wrc(k`8DI2bZ_f7qR?^U|t1##bD!oFBC`GF_y zTcd;Lu8hN13@z}|Y5Oq!EJp@c`I7NkjX3XWA}Nef!j2YlIBx709Di{G&b>VaZ|@yQ zw6=W0>Hes?pVkd@Wbxa_>G0_Oyn7V&3nI&e>>2atR97JC3deuSTmM{zDfv zC@2liK*tyCgypA>A(;*{bi`Bwzsm~HuhkpTJ5MR9*LsEaU$%vlt|oA=zzN})T+XB8 zukC-UmC!Ap(PB3JQe^X4%(NSof;wHS;cuWiYOFOtBc%4|ewrq_(Q1Wed`m{R_6VGZ z;~jMQ&LH&5Q3q)pT#Bm9T)0NV51iR%2Q;Zz!hsV-4-K~=3+ly9v^vVozuAjIeHuB> zxvAW&)#hk>?-}mkrlIKZ)~RUN{b49hIS+ZZPvtIOrbt#s%CR5i=z?k~7xcaWtrvrS zT59(;`jX&JYUD0Uvo%5)EL4UMRB6`dw#;7ARh1<33 zD)Q-XgJxV)K<(~dxq{A(=$6zK#k@&GyYT|)tf@+`2SGW%W3Fen>Q-ybVK#Kl97e_KsYpC6>WZAgE|}|(3#D%(6|v7#lM$w z8D|jMG2{udmHo!;NZ5ijQ!xrp9m;*E?d9&?yN=Y$w<66gom`;(CS)=7A9v{L0+c_q z2N@ojhc5T86kW>QoE%z=#!79Gxm6K5rgE24mx>PCknLR0(Q5Q@?>{sp?i^b2Yy$e~ z?~9&hk3n5p%7{82M;*ookT82DdKPO9TRrR13;jxN^W!#j!rccwDp`v19_FG=xBbv% zi@&JA>jYX^xQ~+>PUFzQLfgMaRooAEGZZxLJB(Rg3}n>+Tt4XlURmf5liC{4{2jq? zDwqRheGJ>aXG2S^Hh5=^#B-lY;MUAIh@AL9^n@|$745|t$r12lP9o~*ngB1}bbyO) zEgEFiDLzw?xJ*_9+UIp5>sMRgn#%@=Z7hW2WwB5w`e+|F-iMn%V^D~U6mBfHg^3|r zSaq+8IG3;uj{HOL?Zpc42^@vTADkze0@FZi*+fW>%K~!63Ht2L!kK@1`1t)Ic7ugxV|Ogze+-u?8XkstH_q&-FSP?DtuYafR4*i zA$s$YFupN}tX-6Z7ysl){?wDC_tsAmo4t^UJg8Nn?Ur(?O-$TB3w)Sj^^f^LCR?-z<7!kx~$a! zou6)^rZ@$hI9ovb*N%h91LQ$-)>-80w>EmxbuhHX| zgRzx?GPqZ3iRYpo3PBYqNuz1;?ex0G;Y6TtkbJMgK+X>j&R9{8Lh*fMoAS{gn86#1>-dLt5* zy{i$;CR<$Ibq6g7SA$IZBe-hm8?@Is5Y|Vo15dP~JzEFD!UIzxk>Cz^M*`WX*)!U(+KC9}XJ`~}eRwt<3qAgk?7r^1xcT}(0 z$@M!>k7ib0hMhm}gUS7axOY%3Jko2y_anc7e0c!AIG4e(b@K2}Z!KuQyn(xuF2mG6 zbFlie<1qN9In;KIz$;UKp`%*{g1317nxW8*`mZ?$3L8z~oT@TPNtJ_Trib9)rFXF0 z;4+Hb_6@GSd4z_W_MrUhPB`tq-!OXeG>~hzfT_jS@JK%hHimk^x|@!0`}IuF@0W|N z&M?7W$JT;W8jQ0aSS&Iv~vCYwt6_f;vvv;Fm<$DpNRH9b! zh3Ll5oZ-m7+>W&zGEgIEM2-{EA^Xt!-*PDO$tNVt8IDG-HAJSbMSHZL2A3WdjLv5- zL!T@Qxb>H(B8UEkC@_-a99*t)H+$3|sZR?vy;MaN-HOQ1|2ZeUR*0sS?M1mV1dZmu zqH~URob*5q+IpZLD&O@3t*EHyjH=F~FLjEj7LP<{{ykg(4KnwA72mzSce_c|M09;|}y)eT0sU31YZ>ufZqZx@R4PeUh{ zsUan!t=xoD`)xfRxpEt4h;B%jBDR~Qi$i+*;ZxJQ@L2!-u)+I1F#ZsnGMNJU5$Vts zH34_8D}uV$-dOJ7PEZb81*Eq>)*e@f+C^JZ%6|ccD?EC3V+o9l>lWRtY;>j681j-F z@TU10(A8uQ20IVJJqd!!*L9Hk>Kf$bC&P%d$~f?CCrS^L#VW7ILCQC6F<0La23K`!w83SVn3sBKg3kUzCfvJZYK2qd{wLQ*)!=D&D$@>cU$(w-h zj*+v%ErxEa1Lcj6(4&=Oq2aM@ZD`1m@+%A>*f?*zzN zG7Me}nFik1_n~l&fyg~7nX}vQg=4`V(YVGs(Z0?AZp$6CF^+-V`y+@;#bE5(hw|jd zp@Z+XBBP0GK)w1gx*{7RdN3c*`{EqbF52$*J+E>XrtLxcV-BOyGb*_Wiv3~nPI0dl zbeU@#s)X)p2ZF8dD%5m)2&!7gP{kB=bbnbYDoF8x#_FMv|J@WEJ+5;R#f?a*!3@=D z42FBtywN8)Inj;^L)Lbg=*bsPFw+Hi=az?Nif7gR%16+~W7Q~i+b{?)-vCcVllHE- zAERo2QEE{r(%*gn{=OIOi=us~|J*}J+2jSPHF%89jns$KxM;E7`Gm7^ePp}tUNbj3 zzZ&v~>5vEgc4K}4CP~YmlAOmL*h$A7a<#*;L(XRWVb2!4b(sF5cVMO$q~JhoWnf(M??g6*-cc*lBMvSapBxc6l^&iBe9rR5LdXv7@6SbhdR z7`s}G{?H{0l3nnnE&Z@Wha<}rgW$Uaksgc5xH*3TnaP{tlBTP8q;D)fR;qy~30cI? z#2qI-8jlU<@5NDjLU54PX)L<8zy zVK6zmcL3=cH33g?QzSxo4p!YJkAu7v;c9Y*m`OhxEBaBGo<0tq?VSf_xo((clZAS@ zt!Vkj0(Aau9vXkV0&UT;hSY=&$nZ%#uvi;hZlB6&EeJ*ZCq6{S@}F?` zj4z=Hb{v$RxXnFVlW}Yi1vMwS%DqnLsFVyTN(c zxWOjtWK@UO03GjzT>Y;jx3xLQ?`s*#cVSSgs0Dp~tC07q_h^aWDEgvrw#cSwZtuO9*^ijI109=!PCf2?qxt_I(65 zHsU9@O+0_4$lK#78PB0vw3D}|jwE(2sh~Tp9l3|x1+{ZpxW3^R3`*MrSi1~HP27iH zWMzR$k18IwaW-DuV~0-u^uW`q-$0AIBlI^dhnwDU7^;oHC_4?ZKi|QsLL$tPG=Xz; z0vvgr09OVj;GCVJt9oQ7{LnjyXQ^kQej9e*d>D6J8OwB>gcUjZI5;~2?~c5HKMtIZtAdnBZ)iIFa`J;ow|wy)n?_it zp9S02J7ASC7d%?*?d&<@h-CtWb<(mbO9J5m+fYtGC(`i`5PkRG=tRzc;5XqA z@|~#-A!GKUrQ7Xc#^NLtqpA%5MmZxr>&;-^86`evYe3pD3Y6Xqgw2+bkk36rPfd)W zKsOpT^u9)%+$}(tXMpU1LnyTED%1!4K+B}3k%p-T7>*t(%9 z)1SRW;|4gO^l;HY_&gdmb?_)(YaonSh>_gTHk2e+h}b9uF@^qU#Ee9^`^5w)H^#!_ z+XhhRKOOEK@JGteV^ERNFj!V5`pLhwU~AC;xS6C1$-3#V=Rq|(+GL5+lKaD{=xX$; zrUVHRFEEq}C?@9=?3*i5_Zhfxv)Rw7N4LE%Z-7hpc1K6g`O0ZP{9qNhr7sVon(g6C$u?Bz zA&(CJGvKza?%+O&^^Y6hH^I-)8rW678fHIOg>6h3oPY8FCUl$v*P-cP^`sPPcOJu$ zcgvtnRtBFCZ9Ato7{YjY>?Tb|`#?ruNpahPe zABH2Un$Y#03m|!^A-3H6L!Ch!wpT3$yM|Ke=b;bzi{v3^OcE3yZ~&=|ExdawgIC&8 zbocQDT;g8{iR}Zi?`mf#46y~jb9FE|xd%a^6tp&P0(UDt2v{{A&rP(z(y(fzqxu{| zjGSRuNEJLAk`A9=xj}PQCH9{=6sMjUh!(t>F8)Sd=6DMSNHCfOe>>#yzC-zVyHg8T z<-ErhwI+CxLM)!Er6Zo7A3@A(d2G0OFzMAX$L*SdIM3G_zx$L1F2S2|T%Rq}q@9E0 z&?>BFdkB_m+hEIxf5L2Tw4AP>DSriYEZM9|Xgc&f;3nv3PMr z2e<{L!rF9qNM70nbGM~~t@0kQ3p2+BJ&6dMD)7qx{=lh(W7yvR8o27W!n`jNF*BG1 zDw+fF?u%C;oXY{{?fdcingdwzBn9mg;dq4DkL>!ViU%D&3!3TqV4$W<7DX20mQ@@& zKISGGdBqS_K0OZuwBEpv2vek!-hq6BDSDe`44KDtkeZnd@{^x|cGw?9e^;iW^p&OP z&)TP)%M8(zTl|GPYY~qoD6d8j^YppY$U^j}YddlZzQ8H0vP4EncF3#Qjw}1}jSEw* z677kRsMUEnS6fGr@8n~gsgDEF5o=bd|5>7D!^OzKZvuDe%}iwHaf#azoP#2Mui(yI z84L$6gmV0yG3espQuO6Q43hito1@4Sv5S|vN%nH+@4-nZStSP@k`c`j$6BO*cRSJ^ zQO@O6%fZ@19thjK;C}xej#^E%(a3SR2%E-UkS9-wv zxBKCgVh&u6bj3SOSAoi_@i4N!24Yroxa8;%yh3LSE)xGowQA(>Rku%Yd|5eMP#lJ{ zEp^etp9*-g!Zw`lTMqxC6>-R?6j*6$hEzJv!3b4zQ8pNb)#oe%?`eBsHA?`6$$B^| zHXa8Yr~EwI(u(}xPV?|eZ4Z;`1HSX)! z65Puk!bVFe$|wh<>!gEj!Z6tT@Bj>Ypb1|;SimCPf$&cD3*26;018@X(ZFB|ON8C9 z+-ofiZEJ#?e)2$LVnOxmG`O2P4dTT0_P$mv7cuWJuvJp9s8ohdA1AnCJOm`GUUM_= z_o3Rm#qi*85~QC!jB2kBfgzUF$mjMJIB=A~ANPUa@i_yg4D?1rLtJ3%>5*X2T?T&o z3{AdmitE$$(W)KQ(7dP`TKf*b)9tyaXx~p%Tr>rOm)=8lmo~v$(UrP+)D<#A)v#P^ zENnWt33F~7{{UB~!qBsRFVXb> z1bn%Q;3tzm<3m+qoN1mc{_mn9Sgsn3kIDc}IyV}Jy0^o>{)Twh8a1-7=nFg)X5uO0 znD0*eYV>$y07lJvxW^;_ekChoBk4Q5VQw^Z%07XDg?6M(V;>m!8sqyfd%XsHsO$`2%oi@!mq(|$Zp*95Ely$#84+{o8oqp(@d zOZY11f#vp$#Bt40c&*z*eCfv!GQ++Wo8Fp5X!jYAZ*KtG3q`ng$!PpWSp`3R+Jr|g zt-*fNbn$~5C-BdBiibO%hDMDF1czq8l2kNBDr|v*W2TyS+WBzjTt%uQ&Z9BO^2i4K_ zi(cqi;!y5W70(@}yU-QC7&PzNORiprAlsRbxj9lm7wi&HkrzcPmK;ImziK(NE3;6> zRR!eMz7Li4WOF_ZOHjv@U0e&Fj?Bh1qTY|kP|>MrXvvXv=vY!YniTt!%U`tzZJb^# z*8eAX^uG2a-1FX7ag_M0_#o5+@y5i-pqP# zdmcA)E-)BLS;F7Z}I0heTHgTTffMJ!l|ly~ex_ZPk5bnFiy z`#A&9*T%7^@?AbEeDv6MN^cCZQOHKgE3RQFSnc0~$ zr_agmZu>GA_QIQ**L@b?xt}Ss`d2}lzKPK0DY(|ZobuS|15HOeqhkLg(1h%yZphof zNQEuTn$`qn1g(YERl6V*co0_YhIpo7(1^E)n$~$P1aIsFcc(Uo$o4BidN&SE58ny5 zWwT-YhC@_SQ7CmIu*i3mxr(~j^d(gg7l!toAwS8Kc%!`MfjHvhEYvKy2vt|kLDr#r z(50BYsN}&U+_7Xa+TKWu);OO>?=GgI6@gz-$c(9Q^4=NL&T%mI4w-`TbC;s}Ew_U8 z<1f1KMhA4#gVqU{1lWaVju`sQV! z*!7c9$yrCFdE$#N5BW)TpE3v=znY3(7&&9)?}@BFxgzVw`%sgvvi3=6cTgD`=&=!bB{fIk8C7_~C_~)YcqP@y*A2|+`_vQh zP}JnOHCnnn0e?1#rZ;8su=@Q#dbLc6pC+5qriC3a`Z57++iZ`IOd3T0dM&}`A8%p# z!ze6}<{+=_dof8*cP|G{@Wg1wYCk+{(Fh##)(6SE zsBkxf{^JfJl^*MMF2@=~P8}l`I z=ermj5`P1)E$@bB9*o691QcG;b1z!LZ;8!Il(^gSMJO?LHBLNZMF%`jLPHhX@KQ}7 zdU&xT9_>|*ia$L?dq*F`T?M!BYbQ74zb_q)SeDCsOvA=! z=h2SzJbdShH9f(0AwHnkh$JO*@vIdI^ogaraJ2nFJp4)_9vu1r*-YGxCrV84#+dVH znwv3wu-z8ap;t>>H2XEOS#u1=Y~P5oE8Vc!=xXvRw^=BuXd@nUZ8}PmE@P!Ho4IwDH+Hl;5*4zKYMG%6M};pav5jr|G0E=XKxOem+WUm_>pW^d=DKri4g=S4^ zhFq&BA&;Kjkl@ODC|o-jMG8Dns$VF&^F)S*w2Ogy_VbXN;czq}d?#%1+X+jD_CedX zg+Nxn(=gWc8cJH!7}@=t4zdz2bezaCgLfgw2YaFuKQ}_-)iLPy_sKBvv={0*b}uY5 zt46|6o4~-q5X(g|Fw1!m+8(n6m5=g5vB#UEf^83AkcAwDyx0RKWtON%n|O2$JE7B` zAEDrYX2`^154>?zp>s1B>GK-Id?-NGdCTF=3_oyAD5HuG&w#Fb z7lHeNL2%ra95I~TxYm;tQXzYXBZ0C+k! z5WW!G@UgEoHO(s%G~dZS^T|)thpU4jzdR9!^=U+94R?X;O_$(K8$J}fn8KS}S8(5N z2~NT#aBOfj6xvV_`r`mJv6oWWjjv^BhU*Fj>8`I`O)K*%C zDhAC)DH*5XrQdF}PsZS%Ka0_n;3a6q;f1K}Pg6A6VgaIjjZkoqDSEr)9a{NR3x~+Q zeL6+HtJd!qRo-h4_}>;F_eNzXw@*?2CRO&cJo>JDgI?nL)LhM_m3#v@aW2Rd+! zd~40o0GY1NKxtLQ5Y@v6ZLtu5(X=l}IA9VQ)#5OEEuW6+DdSPc^QB1Oat9ugy`uPI z1!&jBtI)W6Yy5P>O0X2aq=4rnnO178}PfK4&|;i|VgI=I0dZYN!bz#h#( zdBFj>N-%n`;{=>C^+p3pEV;3V8QQS67%G-sgP+U2$X?rY7|~<_YT@RFdYx?sH$L`5 zE^YQg^L5+c&Z(R5g4mUJj`EQIt%0b5d_VfM(RS*L$#lvi@irA7N4|BMe438h5=}?7 zGGv|J)4shCJu_cSZ*FviUb^Bm z`fe3Zm!I^c+pS5#ZJ#uvhlKy4+fSZ>?}&});$G3r@=Ggm#TIvZa?^VBrz~$;bK@F) zZ=5r2_x>zKV8RT#?};}*{YCHcccOcqX~(qu`W?@X*+!QYlbGm>3pj1?K*q(|lXeO+ zp)JB==+|31&|_~+pdByWq)Y6)m|xSj)999vS#w|1j0-vaUc6`!zV;b(Z!`XD5_cq0EJJ5cBSa_AoH z0L87g!^T@HAg0xNuo-m-nzcI$%y~b$62cSBnT>!UUJK#moSE_7B z4(twXjt-3??|Ay23_FL7LB(&o!O2KDv`j95PtW>5Y~NNWd;dBZ*gX{{>}-T+W(oDK z142bVRzP!AFzOuI9}aE`K<7L?p?=TRkea#-{pxfRqSy6;;0{~iBR&L2>XpOpnPkt| zI1yoi7wR^1I!d@V0h+HSaoelmX!7}e@U6`>@JbzmPB8D`@f$6idD|5&8PXT_nxCag zPuf#feT?C|Rt$ZOOwhAY^-#%lL-gt6di4F|Z18x!0Pd7;guww>(7^9FpwYf)XLk`; zT_xXaoLU0;-_}6Z)yAl*!!jy)-6G&GvPJ8nU0~Pi#b8i%8=45psOttH@H0FCohe%j zpJeSIyIB!f*B=S0P5a^dO$+1}x*Xc!dgy5ICrbS8Eevlq2qyUZqP!O#FyxjQ3RvO| z3um8(39I9fz~U<$o;ePNnU})E^lYH*^Pp`@Gt|-C7tzm(p?Jtl^izHvS}-lZGs+bW zE?EK>+~>g{l7Hy<@B;+#+o84ZJkZjE;V|hd`8N7TJ1U^R8T9Q|0FHh~VbS*^(0SA) z*s?Yg*={5T>!iV9&jXcER-83LtN=SX!GbCSnd_W z=uKv5=!MPHpp&=Z!kSokzo#R5^s5c$nk5q3v6TZ;r+)< z*i$Km?p_B$u+AQx3SR|B8%;pxiLL9M<_O!D*dkZ=S@7bK8a7%nsB!8UIDWA{%JVWr zU-P^mp}}0L=};4Df9Nf0T}(EZXT*|y+fqm!D}lJT6e@q&5Gr<3(EZ^em?AWSh7rSH zD)EJScO&+Cr!A0us6YJjSpgxV3gO&TUn=jaBRG-#&4~@CQ$G@&U{&Qv;Kfa%?oR(s zwR^iBvNm|Z^7*$Zqwn3}Wor{qDx;wMel|EiXan2(=fL31fs}oY0S@r(pgtTPp9RYV1<+e+3c2rB14g^yQXdiQ5t^a>#|OeS z|54DGeCyb)i3-w5uHo<(b3lNjp!sJFxbC|P9ed8CCcC$$p7hZA(teAm$lj(f)7K95 zCAuQfyaL`{*o&UzUx1J%cj5j0PUy?+9dJhb0va}6g*KX92f=~nXs53svP*jdSJ6vY zG5H;}lXnI-i~6CCmpee0-s#}fV>;};@qvw}l$KIZB@`D-{eVGgO?i_{-9qPf5`A%qZ@_uTMM1an*x8URV5afDD z4RdB40b3F)%T5S@VEhRTIvJy+QN^$!ZYt4>S~wWI6wW!2*zt--(Bnojl<! zZx5o8w~A2fxYg8#hO?-kX91Mw*%Yu&3Ij1*C;Ocy$nmo&+IE?I$Mqs& z_~#~=Jbwq2Jq|)$Pl>^2aTiod7X!0x1|99PLiSL*7$Bc!?a}+0YoXQJ5abYbl%hL2pvPnL zz}2=Z>g(zQ0Vmy&qIG9@-~B3_N}GeGyV@Z0>wRJTon7G4KN^NSSq9%s15umy{wU7=2nh1BUb`zR<9*kp1#L@Nc-I@N1UnRdf){y7r{xA_va?1CBsXC9}feEAIiX2YmK#rI&4 zb0!poZG{-W_OLX-5L#pwf=yoo$PSB#`Y#j^+|m=)cv_+4h4FA{+YG9}wH&%N3k07B zMqt``K70%Dh5nYo$jXZR9_3e0%H7e2>MgzRyYqrAG@nvVb-daihLw(mtg;Na*rXL& zIqW(${reT_bVnzc{@fE@_3?o%W%-a+Nc`{J3t&fkL)h1(C%iA5440E1`Z5DsLpa?V zuI$UC9%eOyil?Jt*~5-h2--`idYpQ-tUD}{o1rN|J1FUh7SQUs zAL`L94Zb}d4%_DD!ug{Jn8ugk!8#S(zaImx?=2CNv4rZWehaJFP@vl)lz*Z*ShV;K z%Tn9H(ZoXNWi9`aJoO5vsKH2^~eDpjmkQ=?BQV-*W6N zI)i+A+Th&H9q{Ow&SqLMy$s3KW~K=y?oKIBi8V`c^S$SZ9-r2NY2*6Xw=^GEZUryh1U=I0zzIE zyj_D)NSr6~bL~Suh!BFxrkkUUkIe7{^9D#dv?n@Xp+O@Wy`$C)NJ7#1gOIX#CX{}6 zM=zN($bB~tuJ*qNQq&$Fq@IDjg&SH?&j|Hj`U|qtD$$MBk!V=TJLtdXGx8sgVXNpk zy86%rz07Zn7v+6|!U0>+_aBW=R7rg_+Y{q?<`JmhuNRPME<*dKbwr_7gVCKWm(i19 zR(RYMvM>FrKOWn#DZ1PL6~qlJMeW{pKx@9)p!(VhloqoI-6Qe$Vf>}2jk^cZc3uhd zJ87x2fy3am!z(!Twl)3|Kz=)5Zi~AeZc5*Yi@?>=*?8dodN{`32+wNlfrndnqRYMg z@%(nv@fgRx*zV32wEV_2yje{?$TIdm`cZis+b%qg+{b$2lG&k1;hX^@weyk1pk`QE zGy_M<$v2`KrsLQJIp~yc7INiZ#k_=`cmy`VCp~r`%UhX9GWtC_b$u<`@@X?tz74?N z+B|}*kMH2aE1#nDo_q27Bn#Ysg9$F0I212yJqFFc-V6t~w!zWk7Nfl7yYPlzTk-x8 zp=d{J7M^!-FIr6U`Xb)FL9II3 z*xe0%jIl$~2m8^95|S6!P7B={T!Jpv5bAZ|A$y0Z$m`lS z_^|Fgx)j}&`0WT@-@i!NpBzrfJ+h$UZaHkps6aP+%|W90J?Ir4i@OXkK?T8^;bx=e z=vuS4P%Z3&a+;SQ(?PdkV5BkrF}wwuv$h$!vxD%>=NFXAk2`RM9*Ay~g}~!c{m?y< zSLs_F3Xdi&gYG2%xM<~T5O%x|w>#EH5oRM$c(bXkNo4=+v=r7pJ%$V< zcL6v1hDufYp>xYN==yPc)KAd>wOQI4zfliBPd?b9-hy6eVXF$*mKTM_`kY2_CU$u9 zXdxPqH4qn{bw}TJWkiSkxTk<(2QQ!MF+=I6{q5uGU1BxfY7l&z*> zBw4V%#1AdXnTw)7W}^HtT6EzN`6TjV!skuTf&Yg%)NsKDl5;r^P43hOeV%WJ?i83~ zzd;V@>GZKEx|J6BFS|nZGTViQy_=05O*{jAmNSTYeH(30&w>MQe2{dK4|eeGhy}k0XzM7v<7PM1{`oeLBwk0k zvBBtO%cf}k?0gi}ZWY?(xE?KOuR%Y@HbviuZHH-{4B&3-2>9^O4Xt&yL&JyUpa#vI zkub{|hgMaA(T@|b()~FIUE83gu1@H|jG^eNs|gw`NkG-iCUB@UL-e8%$h@kQ8XtTJ z9c(-hU2L-rzM+L6&h3PX*K4Rya}wt$U5qXmp8!Pk`M~IVAgmVz1~Ex!k-!kyL{-5i zX9f+Q=>k(50}Adq8B{?V(HY`k+FotZMA0xvafj#TSNu2E%E zck3n#;(GiK;*}YFN%rKP!oqVK#yeEuz8UuitgSCc%xdQ{p|6?WU(cuUB)%_DTa%zYjeih+yCm-S_c^Zf~N#gP@6L3Y3P3YwYD?DR` z5#8u1dF_AOf%v>{1ikhC9dvhO2i)qIFFx8X7cZV|MQ0Qn<8+7f$fW!hHd@seRlT*M zy}xz9>hg~CPi%)1L`L+9im&L>)b@C8jEr7!t3Q4}*Mj7d#p8C*hZ6f|GX4;@35%L| z(0c`k@D^1+de<^zJUgA_xfb8SY178z?}MZ9_vdkRf%P7I(Ap2DO?rc6Gkx&K#%odY z%=_>>U_RPcx)rJCj0`r={Y!EQ>mJrUTbgL1E3~0@uh>m?TbX`_X>@SL zV7ArmLE6Dxz1ZF5-Ppp}d0O9hW!hEqe`;0>w$iIFI-l-KF9>MP0I$hIbbCuRL zg{ts&v()UI6u{oU6~%TUuYb$mF;~-IUxKF5lKafuRSUIypF18lfP2qRk}%g#hZMlbL<}N`Ix>M&*xV(+j(C!(wCbxyxd##kKvaX z-K!U-)*`LR|3q5tYZq$JmbBlRexPxsn?7}>=cSj^pHzjo{mDhRP0M|F_l;xt?N72? zn+MRP?sV>(;r?O#bK4c%u1tyf3yreR}66Tz!wG-!FZO#!R-PXEG{$ttgDX zJEI4_yCjsR_qV}aZ`{N9a5hc%vZKcqj3HZo+pu|)8F+1@1^A#nd0SgZXS#Qbqd4p5 zTDpCpHC_F|7XO^)Mqj_S276u6;K*`o8c+Oz54Gt8b5?rwiKN39)|Bchj^sqlUAw>W)JI=ge7ajhr-5EUB@6fT8RmYe8pfgvj(l#33#&6iZ$MmPR zSqv|(m1b=BJ=*AA&zO>#J=lAKOQ@NZk@mtHDXS5l)%p%NuCYDPOM7YJ^vc(L7igYe ze$94MZ_reA+fj915bbx&z>KwDW2G&c|AA>W!@_UY^Y&G?RvY|Q#RoG_j%RDP?KG$= zi)WZmk#cPpA1ym}$2WZG-Fj_#Gizq1uNOO#Z>-g{?c&#_Ux+rewaV{E<*uqR%FN0R z4Te^2x?JW5jc+j9Z%o#1UC_L$+C$3jSl6D3j^3v&9W37hcA*!Bv<-bxJu(*g# zu5`qqZEdxCd#tP+9X3#V$#DoX-Kmwfb;d#VMZW=>cjp4C6yv6_3Ga)U(4Qfiu?Gs+ z)H%UyeA+r~PH_pl^}sakw(8k_Yxuk>{H(Nsay721A};{d>~Ur+zWQi;pF6`2vTDsf zr}wgElUT3$Us34t z`)tXRNUhhQLE4m?$Fy%>*VhgWb!QLCE^9^jcCytA5HvCo(n&!#fp+Q~IdW))~Vu6&}=JSo#m2y*v3 z?f*!#*ZDo0bG}-8B5b<0wA9njWmblEg@ahzRa#ON5q`oi<{6JYSX!pdclfFe=~Kea zr=PP4-s7}^v8$LJPqs7ToyxS+9(!v#3no-;oxZ})Zed?`{_U@t?)}ZGE_7?jrniz- zxpzwUv8>dH7*f7P6PLa`lI^{8zA>JW>2PSLE`xCpkzNyMeK6+S$uo8S*MqpY3qIbJfGxN^;of(4!LEiZB{N~d$r49 ziflKtIksoCm(T9gn1AMLmlmyN2O3Y&hGkg#MWh>6O^*G-JTa)QnsL6wui1bgcIm(^ z+UBde(}{ztSf9dh)v`@{#b&$M%Elv#*&*}%wXp*fY}8h=W^$HcPg`o0j*fm) z<5z3vt=_~8v(akzjB~1VHosdbvwp%J3vtk%Fgu}Lx}||%+=cJ>klPlz6|K?u9yiju zs6v>yF&(unI=s@Dp4`GHrCXTii?W${;UBe=8ujqIKDAuy<+`4oJY%)?(SRAu;k(&d zr{FV8&Q_XP{>4MvZptclSYmrk^391=V*+RUopiOX@|`=}FE$~rYGLka_QT%m+DA(* z{cdgNR3RUfA8u1vVrl)3Mk$c~j&Xq+ue*#qRIj=g98s4O-N*Q~jlLm#Rx zp&Kj<)MRKxZ0>{h+OVkE%&Sig*lBOK)2oKA*EBn9uC)reP-!)P3A2920!@U?OvY`{ zGEK$o2TW$vHI4p7M`pi`l)qkd^bf^Rt8r^o$GF%mCTyvc*)l4GSvq(cle}salY1hF z9&!CBetXiCz7akFpZ~fZ7a4Y;yX@{sce~J;?Y14U#eL2&;$GIQy7>o&cWOC}cbl^P zCgd{jXY!e6T_!OGQG1zPjyLJ>Z7u!m%09E7TYO@h-Lvr99P*yMld%*%erib9UpkfU zxhoTkT=w8K-+XB4nn1c+*(7|ag%bN8Z-kdmi^a!srs9WT{&-730X88o7Z`eL3HCm4 z8h5^X3i3A37nTn#$j9~K?Oso?6)`o26=Cvf|<&W?5#Q~EU(e-QC?@>NI(=vn^ zb}5f})pjxt;`PUS)&(QO+aAO)Ex>D?cj3@`)_%pl&sejJt8A+oFWH56FSF}H(^1~Y zmdu2ClbBemGW@hjIQ^;3D8}Hz0_Mb-rgRT+A>HmnV|w*sKK(s(03AMf5^m_(iS2u^ z6}#n2W43f$Q+C5H3l=gPqYlTP;oZ-U;x2olap3Wz*m>P2T&!tM4~tF1Q|=tZB`0jK z+BXpQ895y{AKwU1s&3BgvA;~Gv=pGNZH{95Y2DZaZ&0PAfW$<7N5u&z@=6{&`n+ zQ4(Tp^DWtAV@LK)DpF>|7AZ835dNP-c zr!sMqCoy9?jA1t8^Jx0dCFGsoubAlia(Y_-4$QGZChW3d#OOWOk&eB!j?M_+V^vOX zYwlk4EF0cc;EM&`itzZKVR|@%lF=2enuhR*+^_c^%=FIrj=1gFq7I`K)&_$9N zc>nyCcu`O+F79tcUmTf;Z*-W?PEZbI*KEpVZFWy*hqB|?y<-FLsr`4c>Em3y>~tZb z2u=H#&BY5JWTKEWi?D2`GhOC4kbJjbG4AJVKs&A5h)WE^82JEqhVJFT7|NrVvcOoz zJERZ2Jfe)*zVQM12HZw^W1%hc=&BLB{8&BK;Mxdga|iqL?Lud7H)FcD9L0R|*~l38>dCrK zXu+Bf>&YHjJ(Mla@na*hjL?HA-RbW=HsH*9uK396P%OCClfLfu8vmL)fW7^r4?Fgm zJKOhtF#D`t8@9v>q0Pra*pOy7m`T>=tYOo7?9p~E>;V@)RCGO#9or&~Juxs9l zmQM|2PYiwlvchV{-64-TzPNsB!M)fPq?WsbW99Af&sD`wonM=^8WZD7Jr7cgr( zHK5<%)y!L%#gy1gppz3W(LX#Znb|QF%*evNbVS<_y32s;m>q0PGl|}G)VA|zMW=oA z>`!~>W|B^H`^!UU>)rF|w{H*O9z}QY2X7bpL-#VQiZ!DbI2h2{kfFF&ee!*SWzjTG z9EnQ{X5e-YgXmeSFW^-R-RJ{rqwtem2e9j4DK>a=4{O+GXh7r6%$s&in5>{`+CuEb zT)Jqi)$x~qOZoiMzip&1 z$n`o+WcI)OzB--NXrbEfWYl41z^v`ekB$_xXS;$~`d}(EZsJaw8{Plx4*t_0s&OMP zoE*6A>G8x0`6-IZ?{NVoGuLqSm8aY4uaB)bdu%5xJ1#CiR$WywP`1y<>zEq8DHLrAh$k82vvKu-;EIa*kvJi% zVdc|fabSO@0$*Qns6wF4FK3(F#GkkCuDIuT8D*LsLq<(2FyH+%@*MFBxjMZ?TZV3^ z=$-F`FS(g)EAX|bJpQ^IAF)@LFA{gg!i`h$5}bqs(K_5?YzbODd`QKbPpvDQ4<4`R z^Y!7juPtr1m9H;HZ7()OMFWE?{GDDQJNH{?EXh)dNb6N$+G$VuL{-%`{h}KFFMHr0 zh^w^+YEXCf$`!BVm$$rDJ<9ZMX=UYoV%ZpP+uUrgZ*6nEHdt-$&>D~Mri5kQ)%j;S zkaSA#A6J%npP+2L*?z~oCXO%mI`gQRS489`uk~#@dattC+TmTtAzq`e4fb-s+|{f8 zJ`?X}mp^#WO+@O4fLvtw(uF}@9MR3;81U`rzgF&Z4A5{bSd;o^j+mO#yHn&{E9_h z%0oT9wj4U`_1U4?o4n1z>-N=DZ-)`C-u2Hf^vX_M(80W(h4;%Tz$RMafH+@B}hpLV`Nj?Z^V|-@-J7$|hAN z5ed?Hid3OMLBNxYRSo6*bZ@ytB8v0YSJs@=Dyt|Ra6ba8o~`zrOyE6!_aKn)6$-V= z{ZDJYBvB?8DU+lMZxYJk{_~UxJbj;Pl}%JspZ>i%>ANJ|sp`w(6hir^n&GGN1wyr| zUShh`Tfto`yyIkJ)GFfua;$xJQiG(1Dx(BY=jG;*Fq=o1aZ;1W$jIKm$Uog7%s{D* zQ`yGJ(o*7u@w_;pL^fI%akDVeFM;8||Ng}Q!GM}0q_|Fh2uWp_q5e-&3NsBe33JJ* zjZpq3EPkA9l<*&fRm0)`p8g-lfLe@+j{J*m%y^0dUsT-EIFg1wnUVYKtjO0lvJLapAs*Vl87xA zolR4UBnoeZP#7;tNmQ%aMC&Re)1+}id9W}=NaE2lxi>Gd_L3(c_guW!G%7F{gL*27 z+;VYb+c=T(57h7KBnwiRWRYJ#s*U}#JLmhI$*AbK#D3-D z6Ub5$M2TEv-c%}!7fN)KcNC;4lrkw#S3<({H6R{j>=QY#PE*Ukpzv@W*IpfA zV5l7IgZOm^>Lwhcl=I139+XLB_d%F4 zN+g%1D0m4nIWJx=8by##X8eBv8ZDEPL`4qBA|*}AOH55usKX3Zj@*ra-0)I`a$XHk zZ8-<@k8%S-p9%bAp}u*Zs!7d6Ozx8IiyO+)`FDV9Z`_c6Cz1ht4yML z1cs?qwn9E386GJXaD<(wa?lq@`C|x2aK%D0LMN3S+0#%66$)oOu7u(U zHe{*1RKA=qkq9Lsg_NgEk_-9qq?LsZfb{;q0j$a7NHs^NT$hAqB^-Aq%4MUKN$OmM z%1I(i+SHT@$fk#qYiS%9HGrU!w9t2SVw8%|UIZI>$zJ%MH z`FkQ_6&Jm<5Etl9X)0r~uxeEU677}?WU^$kry@<0C`CMiD)KOu6{)UE6jsuBJqDzD zv{*~{>9RB>FNH599A+bs^QEbz54nt!CJu7pXkB2Gpj(Hwnnq4h;XML_B7!3O^Fkwe zWYdTj9W+Qs`8`AXh47+61H%SHab-QCd+HgY54r!V*i_X(2Tcx9JaXvYBQh{Z-;sgJ zp6m4Q!utKVZvHAwBf^E7Be$wG!!u7Jn_OdMDe8zBhAL~RgfFdI>!1@BJOV-xq}$q3 zaBDQ0PlTOrIfU`#-d49*gdP%Qe2GFG%i)GwK2j1dm!)zA^=b=JMS|osT~U1nK_d|v zI$5S~nM(%fP-jW%4#k<^moJb1jp76%1&JgM_J6H+GgS@#0;ru>@WfgOb50tiCW*+9 zt>Xkdg&;{7pC%EK@p+KHFoQ4yAvynsC%3{hl{Hy$G7){qJ5psQ97SlY4q=Fw5aLP` zdvF3$%;SzKYYxXsz9N~IkfM%MS&{wAc+v$&d<1y{iHPVR-DDJ`wtk9CF6B!^V}(4u zTvw=bYk+F+^Hfemu=C?2Laq-U8MYu<&(zkOe9?;)!k6}Xc|&0SAt{Kwu_w4rCCvpg z!pu6nkyrEm-Y4azl%`u<@81AKCyI1U2CfXoH_ehR%8p=r%j)fBB{6wALk_b~0iA3P2&nk|u6eQ`C2vN>Td0L8q$TXfX zRiq%on{3V!79->$teG%}3$D*bHKiPC^gZavlh8L&o~_8#MDbiVHCW>D6S(Chqaq?R zO{Yi+z8i=V$uiUe5g{y+q!FpB8?lgRRIUTAkZ^k~w)0fY2?VE0Yio#Zsns0Z)VS83 zI{8nYB@_@tW7K~4LxHjBgJ#p`s`W22LBwbYxZs+S_;#miC%c_>Nu9KY31 z<*XbTk>CoVpU4fdY%Jo1!c?h<)T7e%Dpnx_9(=4M! zWExUQYV8AD|MVatqb4Rv8ZVLKZ!YOe9o)SpXsN0L#~~txO{+b2dEY7MD|z_ zAt&WTgdmylnXzty{Z*EUMDQwz!6GCmwHzyy%Xm^DKSiA@9>cM}Sf=9xl|zl~S3}8! z>Xn4EY7OnW#7~Nw{I`{^(J^XWDoL8klC#F7gxhphs<}SdMo0$cP&e;}(07e_Z=y^m zd%VOxSIPv$AyJA1{Wu-MVVdOKh*DCC6#oO=L_baiSuOI%)%{j$9BJ#=qef)u>p5n2 zR3xP(BoMU}M^2oM;&8k?vb|EN7OBqAhBoz|Jh1llAU~W*S-9O9&1C<+JGMW${$JKnIY3l$I1wh0tw2{H6Lq9>S87nht98nV?;#?&T6wD0WX#-Ut%4_utTRqH zPvpOK*|HX%n^Y~rbsm~RDw8RbbTS}b)rcrTy&%&YK_arUIvf#lYN4ysX&>$h3Z*dh zZy-W`Ejg$JiD!xmY^$Kqlv?wYEghM3E{>PPj{iqllZVG9dViD>vYgL;+>1Mnn5|xk;|BnbnA@RfbDP$cu)uG@t!Xr*2 zh#%|pLX+PrkW<;j<{-!C{#ksTn6Jfj zO|?tyD*Q(J@3pAy;D4c3-@hG)U2aio7OA#X&FT|jsHch=5~!)Rt}7z+9XX`RMO=a& z5gNn?6Ovh`*8tl|i1y^Th%-&(gsSxo>g$2(8Xo!z;yG}wIenVODH=|xaz<&Ss?mQw zgZMqEN&MPT|CjnWp|ZBR!GEbHBv;e;Z%QSjkja%C8S97jKdJR^!2eCKe?!{qW%HjU zu6dNTgh=07cZhf_oW>JyxD^nh(YZ~De2!Qx{$yeL*JNR;92AMfk0w$}Z+z>`kM7M? zHXNUFb=+ceL4*3W;=UHoocU=>EF6xGb+#i(?(}!Tcb~ZidXWGIi9yIx(`zgAf`yn( z4gOK_XV}D`BY$BA!L=jPIdeT6`shS%uzz>G`6(2ttaL7z&Iu+DT&vF})s3WXMfcV< zvW(N%@jR}IKx=EDEeR$FaU}|sq;Lvng7{*CeYu&*#iWvW9v z(M+{$uCmg3x%#*RXZ2d>LQ*2)a&Sqk;ufSMeS)gEHMtbGI5$%);10>$ zK|&5(T+6X0t|jHC@&tSm9TX|kIgMujk8-ligtF`ZDNE%zg?Or}CY(zsApTiA=cP-8 zb+3u!B5ImA6P$GAECb^5S%vE++Mm1S;-DVlkt8uyWf4f+wLgVn#>BHWA;)2XZW#v4 z4dM-p4CVFW4f`4B?u^KtYrIiEgLs2}2I@$X!D|#JQ0I`|^~^h(q~Ma?35s;s;)#fd zV_qYa)G9N*5+WDLBsxICS0s$n)h9_N75a#Y-KCJFr1MjJlG3D{2qX|fo?P6KPP!S-4OE;@uEbf~%GmIDBh&a2E`G(0$eV<{^cDw^#tCu~z|{wayu0WkYKm?o zE?yTCDmxyJpOPZ0i3E`@h`CXZ$4eev&bT1c&U8aEDe0wELV zfC&}}#VLrlt-Ec=-PO_`k9gGjmb#!40Zo+1;z)>zfR0zEWvjEJJJt{~;hf**le&K|b>eX>*^^q9s8bQywlWLVU@s@?DTx^C5{^=uBL}PF^K^!3iOD>?GyHykGnHcPn zIF6!frcl3sjina&yMl0pQ%$UZYf3b4P3V}oTs77YCwPA^ms+eO$nX%4=8j=TgiEZ$ ztil`vt8&fdHcG-LS%!uNxrVt$iALj%l!juX<;EF=TMP{{jAD$&8;>_3{9}?~Tx2-j zSa)SYu1w+$Obo`GjyH=p$}mbcljkZ;b4_xMvy8@@XPAr4@(gke5)9*w<4vYmjJL=z ziZ^X+5O0)hBp;JuG2WEio7LRsl6y0yc`n!5EWWH(3fd+){;`l^`k!2uYI*wI!A)=ZvKlxpN&bb*6bDjDf-d9BkxOvoCmURkpqmcMbS zj@WA$=wU#Fr-iYFp$$1&8Cn@x7+MfvYDVsDNDcXK(#Y7v(7}+*&7pQwH42J^>Buzo zg|%ZRR3KN2OE}%PheMLJj<58>OD*2bmG2-pAfu@yr>R6KM61O)|p$n+#Yw@BsxkF!$W^-R)A{Dd6p2;2)?nVK9_?f$=0o?1Lt*Nb>Q zIYo*W=#C4?Q71RWi*)yk$+2b{R{CkwOd!da)579cq`uB2rvkXhA_=jI*ON1Mzmc5t z#HG69CUT5bi_7$Vuh5@Xawk*qD(++; zUadc^(VhO;6A`VcwKg^v__gFLMG`aQcBBZGkywY$m?z;8va3iGL%fLduB>ay^%49( zLn8vBX41Lb6Ig9HV z6WlrLdOS$(|A*4nZ5pe^90bv{hq#8EUejI8R;SfyCQDuXhci$J`zuL|_Rnr$oqfeH N!?ZN-kwyyd{{u9`YP|pe literal 0 HcmV?d00001 diff --git a/human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/config.pkl b/human_aware_rl/ppo/trained_example/config.pkl similarity index 54% rename from human_aware_rl/ppo/trained_example/PPO_cramped_room_False_nw=0_vf=0.000100_es=0.200000_en=0.000500_kl=0.200000/config.pkl rename to human_aware_rl/ppo/trained_example/config.pkl index b2a1fc0f5da7e00f65b2cbf789760ee444439b7c..183882db00937b3e47a1be5268e8299788bf2def 100644 GIT binary patch delta 710 zcmZ8f%We}f6wL%kCZQDtLZu>#SRj!uNK}c3Sg--9n>3-Sco+g0XU3Uv<9S@$6EeUM zo^7+Mm|tMa0`UQ`;4k0@*zpN?To8zrt#i*k=lJ^mJpQh+_sv@P`2DkWbiPz7QQ}d7 zl^_lay-)AU{6OQHhTDD=(DoK_MluRh66O~wYgf^sKMinmmL#a?mp&2H#c{-$lftcC zbWDR`5)_gw)S8$nDkR*_KU7Xm?__|Qs8-0tnA6ZxKHTl@Sq3V)JQ7d|_c|7;YLd%j z%t;FUnIT9O35s1N;C>Gs{#(bf&)~rmvYQppoZE_b(n6Y0AZLf*o&5zpR>AmRgf>pSQx2tIob#QsA=HCl?BPMpfZb;1Y(6Hk`h5;30?vIoVE5dlQ70{GN2J= z0~lCpH#g?p5(A1(nd$%0{7Ayk53`tXOvd`U+gb{cYgmfLsqGq~sWOs^({|}10uVbO zZAv@r52ixC4VlJ`b5};z8~^Jx?kwFrTB%MsusJ`kAn(I-{h@L8#<OV diff --git a/human_aware_rl/ppo/trained_example/result.json b/human_aware_rl/ppo/trained_example/result.json new file mode 100644 index 00000000..eab61ab6 --- /dev/null +++ b/human_aware_rl/ppo/trained_example/result.json @@ -0,0 +1,500 @@ +{"custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.03125, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.125, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.75, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.8125, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.625, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 4.09375, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.125, "onion_drop_agent_1_min": 1, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.0625, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.09375, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 0.59375, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.09375, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.5625, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.1875, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 1.90625, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 1.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 1.125, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.3125, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.1875, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 1, "soup_delivery_agent_0_mean": 0.0625, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.03125, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.15625, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.0625, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 0.59375, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.09375, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.59375, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.09375, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.20000000298023224, "cur_lr": 0.0010000000474974513, "total_loss": -0.359554648399353, "policy_loss": -0.0014348567929118872, "vf_loss": 0.5835446119308472, "vf_explained_var": 0.0011739898473024368, "kl": 0.0003632040461525321, "entropy": 1.7912538051605225, "entropy_coeff": 0.20000000298023224, "model": {}}}}, "num_env_steps_sampled": 12800, "num_env_steps_trained": 12800, "num_agent_steps_sampled": 25600, "num_agent_steps_trained": 25600}, "sampler_results": {"episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.03125, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.015625}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.03125, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.125, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.75, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.8125, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.625, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 4.09375, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 4.125, "onion_drop_agent_1_min": 1, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.0625, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.09375, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 0.59375, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.09375, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.5625, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.1875, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 1.90625, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 1.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 1.125, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.3125, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.1875, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 1, "soup_delivery_agent_0_mean": 0.0625, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.03125, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.15625, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.0625, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 0.59375, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.09375, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.59375, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.09375, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6989158420229374, "mean_inference_ms": 1.2856450085336348, "mean_action_processing_ms": 0.1339475425441613, "mean_env_wait_ms": 0.8226258031437905, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.03125, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.015625}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6989158420229374, "mean_inference_ms": 1.2856450085336348, "mean_action_processing_ms": 0.1339475425441613, "mean_env_wait_ms": 0.8226258031437905, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 25600, "num_agent_steps_trained": 25600, "num_env_steps_sampled": 12800, "num_env_steps_trained": 12800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 12800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 25600, "timers": {"training_iteration_time_ms": 4486.776, "learn_time_ms": 1795.153, "learn_throughput": 7130.31, "synch_weights_time_ms": 46.233}, "counters": {"num_env_steps_sampled": 12800, "num_env_steps_trained": 12800, "num_agent_steps_sampled": 25600, "num_agent_steps_trained": 25600}, "done": false, "episodes_total": 32, "training_iteration": 1, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_19-59-58", "timestamp": 1666580398, "time_this_iter_s": 4.5041868686676025, "time_total_s": 4.5041868686676025, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 4.5041868686676025, "timesteps_since_restore": 0, "iterations_since_restore": 1, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.328571428571426, "ram_util_percent": 10.442857142857141}} +{"custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.0625, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.0625, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.28125, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.65625, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.015625, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.796875, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.796875, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.171875, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.046875, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 0.796875, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.9375, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.59375, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.765625, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.203125, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.0625, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.359375, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.96875, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 1.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.265625, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.203125, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 2, "soup_delivery_agent_0_mean": 0.046875, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.0625, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.125, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.078125, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 0.796875, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.9375, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.796875, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.9375, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.10000000149011612, "cur_lr": 0.0010000000474974513, "total_loss": -0.33062273263931274, "policy_loss": -0.0030750420410186052, "vf_loss": 0.559157133102417, "vf_explained_var": 0.001139489933848381, "kl": 0.00039957917761057615, "entropy": 1.7906370162963867, "entropy_coeff": 0.18297599256038666, "model": {}}}}, "num_env_steps_sampled": 25600, "num_env_steps_trained": 25600, "num_agent_steps_sampled": 51200, "num_agent_steps_trained": 51200}, "sampler_results": {"episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.0625, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.03125}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.0625, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.0625, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.28125, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.65625, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.015625, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.796875, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.796875, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.171875, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.046875, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 0.796875, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.9375, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.59375, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.765625, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.203125, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.0625, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.359375, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.96875, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 1.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.265625, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.203125, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 2, "soup_delivery_agent_0_mean": 0.046875, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.0625, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.125, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.078125, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 0.796875, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.9375, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.796875, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.9375, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028658943640999, "mean_inference_ms": 1.2718671499974072, "mean_action_processing_ms": 0.13463472279710942, "mean_env_wait_ms": 0.8300894303223083, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.0625, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.03125}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028658943640999, "mean_inference_ms": 1.2718671499974072, "mean_action_processing_ms": 0.13463472279710942, "mean_env_wait_ms": 0.8300894303223083, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 51200, "num_agent_steps_trained": 51200, "num_env_steps_sampled": 25600, "num_env_steps_trained": 25600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 25600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 51200, "timers": {"training_iteration_time_ms": 4118.887, "learn_time_ms": 1423.073, "learn_throughput": 8994.618, "synch_weights_time_ms": 29.77}, "counters": {"num_env_steps_sampled": 25600, "num_env_steps_trained": 25600, "num_agent_steps_sampled": 51200, "num_agent_steps_trained": 51200}, "done": false, "episodes_total": 64, "training_iteration": 2, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-02", "timestamp": 1666580402, "time_this_iter_s": 3.7993717193603516, "time_total_s": 8.303558588027954, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 8.303558588027954, "timesteps_since_restore": 0, "iterations_since_restore": 2, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.78333333333333, "ram_util_percent": 10.566666666666668}} +{"custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.375, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.010416666666667, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.1875, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.5520833333333335, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.7604166666666665, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.5833333333333335, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.7604166666666665, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.1354166666666667, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.2083333333333333, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 0.9166666666666666, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.875, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.78125, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.7395833333333335, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19791666666666666, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.28125, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.2708333333333335, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.03125, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.8958333333333334, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.3020833333333333, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.28125, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.0625, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.08333333333333333, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.16666666666666666, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.15625, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 0.9166666666666666, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.875, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.9166666666666666, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.875, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.05000000074505806, "cur_lr": 0.0010000000474974513, "total_loss": -0.3004382848739624, "policy_loss": -0.003537239972501993, "vf_loss": 0.6018714904785156, "vf_explained_var": 0.00017357245087623596, "kl": 0.0004423881764523685, "entropy": 1.7895737886428833, "entropy_coeff": 0.16595199704170227, "model": {}}}}, "num_env_steps_sampled": 38400, "num_env_steps_trained": 38400, "num_agent_steps_sampled": 76800, "num_agent_steps_trained": 76800}, "sampler_results": {"episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.375, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.1875}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.375, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.010416666666667, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.1875, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.5520833333333335, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.7604166666666665, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 3.5833333333333335, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.7604166666666665, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.1354166666666667, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.2083333333333333, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 0.9166666666666666, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.875, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.78125, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.7395833333333335, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.125, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19791666666666666, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.28125, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.2708333333333335, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.03125, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.8958333333333334, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.3020833333333333, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.28125, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.0625, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.08333333333333333, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.16666666666666666, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.15625, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 0.9166666666666666, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.875, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 0.9166666666666666, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.875, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028905627927106, "mean_inference_ms": 1.2622640403172178, "mean_action_processing_ms": 0.13486106382724458, "mean_env_wait_ms": 0.8341695324982732, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.375, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 20.0}, "policy_reward_mean": {"ppo": 4.1875}, "hist_stats": {"episode_reward": [28.0, 6.0, 6.0, 6.0, 3.0, 6.0, 9.0, 6.0, 8.0, 3.0, 11.0, 3.0, 8.0, 11.0, 3.0, 20.0, 8.0, 11.0, 9.0, 9.0, 6.0, 11.0, 8.0, 12.0, 6.0, 11.0, 6.0, 14.0, 0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 3.0, 3.0, 0.0, 6.0, 0.0, 6.0, 3.0, 0.0, 0.0, 6.0, 3.0, 6.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 5.0, 6.0, 0.0, 3.0, 5.0, 3.0, 8.0, 3.0, 3.0, 0.0, 14.0, 6.0, 5.0, 3.0, 3.0, 8.0, 3.0, 6.0, 0.0, 9.0, 3.0, 3.0, 0.0, 11.0, 8.0, 0.0, 0.0, 12.0, 3.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 11.0, 0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028905627927106, "mean_inference_ms": 1.2622640403172178, "mean_action_processing_ms": 0.13486106382724458, "mean_env_wait_ms": 0.8341695324982732, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 76800, "num_agent_steps_trained": 76800, "num_env_steps_sampled": 38400, "num_env_steps_trained": 38400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 38400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 76800, "timers": {"training_iteration_time_ms": 3969.635, "learn_time_ms": 1296.093, "learn_throughput": 9875.832, "synch_weights_time_ms": 24.94}, "counters": {"num_env_steps_sampled": 38400, "num_env_steps_trained": 38400, "num_agent_steps_sampled": 76800, "num_agent_steps_trained": 76800}, "done": false, "episodes_total": 96, "training_iteration": 3, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-06", "timestamp": 1666580406, "time_this_iter_s": 3.7223825454711914, "time_total_s": 12.025941133499146, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 12.025941133499146, "timesteps_since_restore": 0, "iterations_since_restore": 3, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.360000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.83, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.87, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 4.89, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.39, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.04, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.79, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 3.06, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.9, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 2, "soup_pickup_agent_1_mean": 0.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.08, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.04, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.79, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.04, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.79, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.02500000037252903, "cur_lr": 0.0010000000474974513, "total_loss": -0.2698257565498352, "policy_loss": -0.003558500437065959, "vf_loss": 0.6820269823074341, "vf_explained_var": -0.001748034730553627, "kl": 0.0005089464830234647, "entropy": 1.788435697555542, "entropy_coeff": 0.14892800152301788, "model": {}}}}, "num_env_steps_sampled": 51200, "num_env_steps_trained": 51200, "num_agent_steps_sampled": 102400, "num_agent_steps_trained": 102400}, "sampler_results": {"episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.83, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 17.0}, "policy_reward_mean": {"ppo": 4.415}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 8.83, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.87, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 4.89, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 8, "useful_onion_pickup_agent_1_mean": 3.39, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.04, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.79, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 3.06, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.9, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 2, "soup_pickup_agent_1_mean": 0.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.08, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.24, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.04, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.79, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.04, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.79, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7017628475225973, "mean_inference_ms": 1.2446256924059935, "mean_action_processing_ms": 0.13484534254964872, "mean_env_wait_ms": 0.8348638696126399, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 8.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 17.0}, "policy_reward_mean": {"ppo": 4.415}, "hist_stats": {"episode_reward": [0.0, 0.0, 9.0, 0.0, 9.0, 6.0, 17.0, 6.0, 9.0, 6.0, 3.0, 9.0, 16.0, 6.0, 3.0, 11.0, 11.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 6.0, 8.0, 3.0, 6.0, 14.0, 6.0, 14.0, 12.0, 9.0, 3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 0.0, 0.0, 3.0, 6.0, 0.0, 0.0, 9.0, 0.0, 6.0, 0.0, 6.0, 11.0, 3.0, 3.0, 6.0, 3.0, 3.0, 3.0, 3.0, 0.0, 3.0, 6.0, 5.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 3.0, 8.0, 3.0, 0.0, 3.0, 6.0, 0.0, 0.0, 3.0, 0.0, 0.0, 3.0, 3.0, 0.0, 3.0, 3.0, 3.0, 5.0, 0.0, 3.0, 6.0, 0.0, 3.0, 11.0, 0.0, 6.0, 8.0, 6.0, 9.0, 3.0, 3.0, 6.0, 3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7017628475225973, "mean_inference_ms": 1.2446256924059935, "mean_action_processing_ms": 0.13484534254964872, "mean_env_wait_ms": 0.8348638696126399, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 102400, "num_agent_steps_trained": 102400, "num_env_steps_sampled": 51200, "num_env_steps_trained": 51200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 51200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 102400, "timers": {"training_iteration_time_ms": 3883.504, "learn_time_ms": 1240.403, "learn_throughput": 10319.223, "synch_weights_time_ms": 22.878}, "counters": {"num_env_steps_sampled": 51200, "num_env_steps_trained": 51200, "num_agent_steps_sampled": 102400, "num_agent_steps_trained": 102400}, "done": false, "episodes_total": 128, "training_iteration": 4, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-10", "timestamp": 1666580410, "time_this_iter_s": 3.6808454990386963, "time_total_s": 15.706786632537842, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 15.706786632537842, "timesteps_since_restore": 0, "iterations_since_restore": 4, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.53333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.37, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.15, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.25, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.41, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.33, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.81, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.22, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.76, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 2, "soup_pickup_agent_1_mean": 0.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.81, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.81, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.012500000186264515, "cur_lr": 0.0010000000474974513, "total_loss": -0.23854368925094604, "policy_loss": -0.0029007629491388798, "vf_loss": 0.5773570537567139, "vf_explained_var": -0.0037215352058410645, "kl": 0.0004978743381798267, "entropy": 1.7869577407836914, "entropy_coeff": 0.1319040060043335, "model": {}}}}, "num_env_steps_sampled": 64000, "num_env_steps_trained": 64000, "num_agent_steps_sampled": 128000, "num_agent_steps_trained": 128000}, "sampler_results": {"episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 17.0}, "policy_reward_mean": {"ppo": 4.685}, "custom_metrics": {"sparse_reward_mean": 0.0, "sparse_reward_min": 0, "sparse_reward_max": 0, "shaped_reward_mean": 9.37, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.15, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.25, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.41, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.33, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.56, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.41, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.05, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.81, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.22, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.76, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 0.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 2, "soup_pickup_agent_1_mean": 0.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.05, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.81, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.05, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.81, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6993358203765415, "mean_inference_ms": 1.234363948034564, "mean_action_processing_ms": 0.13489033625826413, "mean_env_wait_ms": 0.8388656832452103, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 28.0, "episode_reward_min": 0.0, "episode_reward_mean": 9.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 17.0}, "policy_reward_mean": {"ppo": 4.685}, "hist_stats": {"episode_reward": [3.0, 11.0, 6.0, 28.0, 8.0, 3.0, 6.0, 9.0, 17.0, 6.0, 11.0, 11.0, 14.0, 8.0, 17.0, 3.0, 11.0, 9.0, 14.0, 3.0, 11.0, 16.0, 9.0, 0.0, 6.0, 12.0, 3.0, 23.0, 6.0, 6.0, 14.0, 14.0, 6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 8.0, 3.0, 6.0, 0.0, 11.0, 17.0, 8.0, 0.0, 3.0, 0.0, 3.0, 3.0, 3.0, 6.0, 11.0, 6.0, 6.0, 0.0, 0.0, 11.0, 0.0, 11.0, 14.0, 0.0, 0.0, 8.0, 6.0, 11.0, 3.0, 0.0, 6.0, 5.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 5.0, 6.0, 5.0, 11.0, 3.0, 6.0, 0.0, 0.0, 0.0, 6.0, 9.0, 3.0, 3.0, 0.0, 15.0, 8.0, 3.0, 3.0, 3.0, 3.0, 6.0, 8.0, 6.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6993358203765415, "mean_inference_ms": 1.234363948034564, "mean_action_processing_ms": 0.13489033625826413, "mean_env_wait_ms": 0.8388656832452103, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 128000, "num_agent_steps_trained": 128000, "num_env_steps_sampled": 64000, "num_env_steps_trained": 64000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 64000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 128000, "timers": {"training_iteration_time_ms": 3867.837, "learn_time_ms": 1225.733, "learn_throughput": 10442.73, "synch_weights_time_ms": 22.151}, "counters": {"num_env_steps_sampled": 64000, "num_env_steps_trained": 64000, "num_agent_steps_sampled": 128000, "num_agent_steps_trained": 128000}, "done": false, "episodes_total": 160, "training_iteration": 5, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-14", "timestamp": 1666580414, "time_this_iter_s": 3.8698205947875977, "time_total_s": 19.57660722732544, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 19.57660722732544, "timesteps_since_restore": 0, "iterations_since_restore": 5, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.500000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 10.13, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.19, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.45, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.45, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.73, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 4.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.92, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.87, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.5, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.92, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.92, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0062500000931322575, "cur_lr": 0.0010000000474974513, "total_loss": -0.2105298936367035, "policy_loss": -0.005385499447584152, "vf_loss": 0.8951879143714905, "vf_explained_var": 0.004527205601334572, "kl": 0.0004916870966553688, "entropy": 1.7865335941314697, "entropy_coeff": 0.11488000303506851, "model": {}}}}, "num_env_steps_sampled": 76800, "num_env_steps_trained": 76800, "num_agent_steps_sampled": 153600, "num_agent_steps_trained": 153600}, "sampler_results": {"episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 10.53, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 5.265}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 10.13, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.19, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.45, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 3.45, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.73, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 3.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 4.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 14, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 1.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 0.92, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 3, "dish_pickup_agent_0_mean": 2.87, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 1, "useful_dish_pickup_agent_1_mean": 0.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.5, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.23, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 0.92, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 3, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 0.92, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 3, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987260941959224, "mean_inference_ms": 1.2293985973594628, "mean_action_processing_ms": 0.13473542591379423, "mean_env_wait_ms": 0.840520264494256, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 10.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 5.265}, "hist_stats": {"episode_reward": [6.0, 3.0, 3.0, 6.0, 8.0, 17.0, 6.0, 6.0, 14.0, 14.0, 14.0, 11.0, 11.0, 17.0, 9.0, 14.0, 3.0, 17.0, 6.0, 19.0, 11.0, 11.0, 3.0, 0.0, 3.0, 25.0, 20.0, 11.0, 9.0, 0.0, 14.0, 6.0, 0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 0.0, 3.0, 0.0, 3.0, 0.0, 3.0, 3.0, 0.0, 8.0, 11.0, 6.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 6.0, 8.0, 8.0, 6.0, 3.0, 8.0, 5.0, 6.0, 11.0, 6.0, 3.0, 6.0, 11.0, 3.0, 3.0, 0.0, 12.0, 5.0, 3.0, 3.0, 10.0, 9.0, 3.0, 8.0, 8.0, 3.0, 3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 8.0, 17.0, 6.0, 14.0, 6.0, 5.0, 9.0, 0.0, 0.0, 0.0, 3.0, 11.0, 6.0, 0.0, 0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987260941959224, "mean_inference_ms": 1.2293985973594628, "mean_action_processing_ms": 0.13473542591379423, "mean_env_wait_ms": 0.840520264494256, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 153600, "num_agent_steps_trained": 153600, "num_env_steps_sampled": 76800, "num_env_steps_trained": 76800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 76800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 153600, "timers": {"training_iteration_time_ms": 3871.15, "learn_time_ms": 1229.232, "learn_throughput": 10413.005, "synch_weights_time_ms": 21.063}, "counters": {"num_env_steps_sampled": 76800, "num_env_steps_trained": 76800, "num_agent_steps_sampled": 153600, "num_agent_steps_trained": 153600}, "done": false, "episodes_total": 192, "training_iteration": 6, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-18", "timestamp": 1666580418, "time_this_iter_s": 3.949986696243286, "time_total_s": 23.526593923568726, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 23.526593923568726, "timesteps_since_restore": 0, "iterations_since_restore": 6, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.883333333333336, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 11.08, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.11, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.53, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.8, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 1.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.02, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 2.79, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.63, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 3, "soup_delivery_agent_0_mean": 0.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.3, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.32, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.02, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.02, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0031250000465661287, "cur_lr": 0.0010000000474974513, "total_loss": -0.1758706122636795, "policy_loss": -0.001237577642314136, "vf_loss": 0.8410797119140625, "vf_explained_var": 0.007221847772598267, "kl": 0.0006372901843860745, "entropy": 1.7854719161987305, "entropy_coeff": 0.09785600006580353, "model": {}}}}, "num_env_steps_sampled": 89600, "num_env_steps_trained": 89600, "num_agent_steps_sampled": 179200, "num_agent_steps_trained": 179200}, "sampler_results": {"episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 11.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 5.74}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 11.08, "shaped_reward_min": 0, "shaped_reward_max": 28, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.11, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.53, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.36, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.8, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 1.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.02, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 2.79, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.63, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 3, "soup_delivery_agent_0_mean": 0.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.3, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.32, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.02, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.02, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.701101003459848, "mean_inference_ms": 1.2283407266417963, "mean_action_processing_ms": 0.13500840795895466, "mean_env_wait_ms": 0.8444517091174393, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 11.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 5.74}, "hist_stats": {"episode_reward": [0.0, 14.0, 3.0, 11.0, 3.0, 6.0, 3.0, 9.0, 8.0, 11.0, 14.0, 3.0, 17.0, 17.0, 3.0, 11.0, 6.0, 20.0, 11.0, 6.0, 9.0, 3.0, 3.0, 11.0, 11.0, 6.0, 6.0, 8.0, 11.0, 3.0, 12.0, 14.0, 3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 0.0, 11.0, 3.0, 3.0, 0.0, 3.0, 8.0, 3.0, 0.0, 6.0, 0.0, 0.0, 3.0, 6.0, 3.0, 0.0, 8.0, 5.0, 6.0, 9.0, 5.0, 0.0, 3.0, 6.0, 11.0, 11.0, 6.0, 0.0, 3.0, 3.0, 8.0, 6.0, 0.0, 14.0, 6.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 3.0, 0.0, 0.0, 3.0, 3.0, 8.0, 3.0, 8.0, 0.0, 6.0, 3.0, 3.0, 8.0, 0.0, 0.0, 11.0, 0.0, 3.0, 6.0, 6.0, 5.0, 9.0, 3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.701101003459848, "mean_inference_ms": 1.2283407266417963, "mean_action_processing_ms": 0.13500840795895466, "mean_env_wait_ms": 0.8444517091174393, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 179200, "num_agent_steps_trained": 179200, "num_env_steps_sampled": 89600, "num_env_steps_trained": 89600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 89600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 179200, "timers": {"training_iteration_time_ms": 3869.742, "learn_time_ms": 1221.337, "learn_throughput": 10480.315, "synch_weights_time_ms": 19.85}, "counters": {"num_env_steps_sampled": 89600, "num_env_steps_trained": 89600, "num_agent_steps_sampled": 179200, "num_agent_steps_trained": 179200}, "done": false, "episodes_total": 224, "training_iteration": 7, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-22", "timestamp": 1666580422, "time_this_iter_s": 3.916262149810791, "time_total_s": 27.442856073379517, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 27.442856073379517, "timesteps_since_restore": 0, "iterations_since_restore": 7, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.03333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.78, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.96, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.41, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.43, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.92, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 1.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.12, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.17, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.26, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 2.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 3, "soup_delivery_agent_0_mean": 0.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.25, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.38, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.12, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.12, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0015625000232830644, "cur_lr": 0.0010000000474974513, "total_loss": -0.14512300491333008, "policy_loss": -0.0010335970437154174, "vf_loss": 0.9326763153076172, "vf_explained_var": 0.009893229231238365, "kl": 0.0006305932183749974, "entropy": 1.7837448120117188, "entropy_coeff": 0.08083199709653854, "model": {}}}}, "num_env_steps_sampled": 102400, "num_env_steps_trained": 102400, "num_agent_steps_sampled": 204800, "num_agent_steps_trained": 204800}, "sampler_results": {"episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 6.59}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 12.78, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.96, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.41, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.43, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.92, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 1.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.12, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 4, "dish_pickup_agent_0_mean": 3.17, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.26, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 11, "dish_drop_agent_1_mean": 2.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.96, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 1.13, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 8, "soup_pickup_agent_0_mean": 0.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 3, "soup_delivery_agent_0_mean": 0.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.14, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.25, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.38, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 1.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.12, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 4, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.12, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 4, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7037344718234254, "mean_inference_ms": 1.2277625253300022, "mean_action_processing_ms": 0.13515547152110657, "mean_env_wait_ms": 0.8473578368718657, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 54.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 6.59}, "hist_stats": {"episode_reward": [3.0, 6.0, 11.0, 9.0, 14.0, 3.0, 17.0, 9.0, 20.0, 22.0, 20.0, 8.0, 8.0, 17.0, 11.0, 3.0, 11.0, 3.0, 12.0, 3.0, 22.0, 8.0, 17.0, 3.0, 22.0, 12.0, 8.0, 14.0, 17.0, 19.0, 3.0, 54.0, 28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 6.0, 0.0, 11.0, 0.0, 6.0, 3.0, 8.0, 6.0, 0.0, 3.0, 6.0, 11.0, 3.0, 6.0, 14.0, 6.0, 8.0, 14.0, 12.0, 8.0, 0.0, 8.0, 5.0, 3.0, 8.0, 9.0, 11.0, 0.0, 3.0, 0.0, 3.0, 8.0, 0.0, 3.0, 6.0, 6.0, 0.0, 3.0, 8.0, 14.0, 8.0, 0.0, 12.0, 5.0, 0.0, 3.0, 6.0, 16.0, 3.0, 9.0, 0.0, 8.0, 11.0, 3.0, 6.0, 11.0, 3.0, 16.0, 3.0, 0.0, 31.0, 23.0, 14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7037344718234254, "mean_inference_ms": 1.2277625253300022, "mean_action_processing_ms": 0.13515547152110657, "mean_env_wait_ms": 0.8473578368718657, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 204800, "num_agent_steps_trained": 204800, "num_env_steps_sampled": 102400, "num_env_steps_trained": 102400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 102400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 204800, "timers": {"training_iteration_time_ms": 3844.573, "learn_time_ms": 1202.47, "learn_throughput": 10644.757, "synch_weights_time_ms": 18.757}, "counters": {"num_env_steps_sampled": 102400, "num_env_steps_trained": 102400, "num_agent_steps_sampled": 204800, "num_agent_steps_trained": 204800}, "done": false, "episodes_total": 256, "training_iteration": 8, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-26", "timestamp": 1666580426, "time_this_iter_s": 3.7250826358795166, "time_total_s": 31.167938709259033, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 31.167938709259033, "timesteps_since_restore": 0, "iterations_since_restore": 8, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.683333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.53, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.95, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.16, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.47, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.64, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.3, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.18, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.15, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.69, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.18, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.88, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.87, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.18, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.28, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.5, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.3, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.18, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.3, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.18, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0007812500116415322, "cur_lr": 0.0010000000474974513, "total_loss": -0.11943156272172928, "policy_loss": -0.00582084758207202, "vf_loss": 0.9689017534255981, "vf_explained_var": 0.007223993539810181, "kl": 0.0006363544380292296, "entropy": 1.7820351123809814, "entropy_coeff": 0.06380800157785416, "model": {}}}}, "num_env_steps_sampled": 115200, "num_env_steps_trained": 115200, "num_agent_steps_sampled": 230400, "num_agent_steps_trained": 230400}, "sampler_results": {"episode_reward_max": 57.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.93, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 6.965}, "custom_metrics": {"sparse_reward_mean": 0.2, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.53, "shaped_reward_min": 0, "shaped_reward_max": 33, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.95, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.16, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.47, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.64, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.51, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.3, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.18, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.15, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.69, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.18, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.88, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.87, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 4, "soup_pickup_agent_1_mean": 0.74, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 4, "soup_delivery_agent_0_mean": 0.18, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.28, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.5, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.3, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.18, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.3, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.18, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704873639212576, "mean_inference_ms": 1.227418086105394, "mean_action_processing_ms": 0.13525291976515277, "mean_env_wait_ms": 0.8505155902591237, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 57.0, "episode_reward_min": 0.0, "episode_reward_mean": 13.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 31.0}, "policy_reward_mean": {"ppo": 6.965}, "hist_stats": {"episode_reward": [28.0, 14.0, 12.0, 0.0, 11.0, 17.0, 26.0, 11.0, 3.0, 11.0, 8.0, 3.0, 14.0, 14.0, 19.0, 11.0, 11.0, 6.0, 3.0, 11.0, 8.0, 14.0, 11.0, 23.0, 17.0, 16.0, 25.0, 14.0, 8.0, 14.0, 20.0, 14.0, 3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [14.0, 14.0, 3.0, 11.0, 12.0, 0.0, 0.0, 0.0, 6.0, 5.0, 6.0, 11.0, 12.0, 14.0, 3.0, 8.0, 0.0, 3.0, 3.0, 8.0, 8.0, 0.0, 0.0, 3.0, 9.0, 5.0, 3.0, 11.0, 16.0, 3.0, 8.0, 3.0, 6.0, 5.0, 6.0, 0.0, 3.0, 0.0, 8.0, 3.0, 3.0, 5.0, 11.0, 3.0, 5.0, 6.0, 15.0, 8.0, 8.0, 9.0, 8.0, 8.0, 11.0, 14.0, 5.0, 9.0, 0.0, 8.0, 11.0, 3.0, 11.0, 9.0, 9.0, 5.0, 3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704873639212576, "mean_inference_ms": 1.227418086105394, "mean_action_processing_ms": 0.13525291976515277, "mean_env_wait_ms": 0.8505155902591237, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 230400, "num_agent_steps_trained": 230400, "num_env_steps_sampled": 115200, "num_env_steps_trained": 115200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 115200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 230400, "timers": {"training_iteration_time_ms": 3837.567, "learn_time_ms": 1197.115, "learn_throughput": 10692.377, "synch_weights_time_ms": 18.365}, "counters": {"num_env_steps_sampled": 115200, "num_env_steps_trained": 115200, "num_agent_steps_sampled": 230400, "num_agent_steps_trained": 230400}, "done": false, "episodes_total": 288, "training_iteration": 9, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-30", "timestamp": 1666580430, "time_this_iter_s": 3.8528952598571777, "time_total_s": 35.02083396911621, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 35.02083396911621, "timesteps_since_restore": 0, "iterations_since_restore": 9, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.45, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 0.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.91, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.03, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.25, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.37, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.53, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.28, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.12, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.18, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.33, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.51, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.28, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.28, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0003906250058207661, "cur_lr": 0.0010000000474974513, "total_loss": -0.08926716446876526, "policy_loss": -0.006043273024260998, "vf_loss": 1.001132607460022, "vf_explained_var": 0.03307230770587921, "kl": 0.0006396523676812649, "entropy": 1.7810418605804443, "entropy_coeff": 0.04678399860858917, "model": {}}}}, "num_env_steps_sampled": 128000, "num_env_steps_trained": 128000, "num_agent_steps_sampled": 256000, "num_agent_steps_trained": 256000}, "sampler_results": {"episode_reward_max": 60.0, "episode_reward_min": 0.0, "episode_reward_mean": 14.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 35.0}, "policy_reward_mean": {"ppo": 7.355}, "custom_metrics": {"sparse_reward_mean": 0.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 13.91, "shaped_reward_min": 0, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.03, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.25, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.37, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.53, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 3.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.28, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.12, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.51, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.6, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 0.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 1, "soup_delivery_agent_1_mean": 0.18, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 1, "soup_drop_agent_0_mean": 0.33, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.51, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.28, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.28, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704291722769332, "mean_inference_ms": 1.226455267948142, "mean_action_processing_ms": 0.13521600805286188, "mean_env_wait_ms": 0.8528073075048549, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 60.0, "episode_reward_min": 0.0, "episode_reward_mean": 14.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 35.0}, "policy_reward_mean": {"ppo": 7.355}, "hist_stats": {"episode_reward": [3.0, 3.0, 17.0, 26.0, 14.0, 17.0, 17.0, 14.0, 17.0, 33.0, 24.0, 17.0, 6.0, 0.0, 11.0, 14.0, 20.0, 11.0, 25.0, 11.0, 9.0, 14.0, 14.0, 14.0, 9.0, 17.0, 14.0, 11.0, 14.0, 6.0, 14.0, 16.0, 6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 0.0, 0.0, 3.0, 6.0, 11.0, 11.0, 15.0, 14.0, 0.0, 11.0, 6.0, 6.0, 11.0, 8.0, 6.0, 6.0, 11.0, 24.0, 9.0, 11.0, 13.0, 14.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0, 11.0, 6.0, 8.0, 12.0, 8.0, 3.0, 8.0, 11.0, 14.0, 8.0, 3.0, 6.0, 3.0, 9.0, 5.0, 3.0, 11.0, 8.0, 6.0, 6.0, 3.0, 3.0, 14.0, 11.0, 3.0, 6.0, 5.0, 11.0, 3.0, 0.0, 6.0, 6.0, 8.0, 6.0, 10.0, 0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704291722769332, "mean_inference_ms": 1.226455267948142, "mean_action_processing_ms": 0.13521600805286188, "mean_env_wait_ms": 0.8528073075048549, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 256000, "num_agent_steps_trained": 256000, "num_env_steps_sampled": 128000, "num_env_steps_trained": 128000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 128000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 256000, "timers": {"training_iteration_time_ms": 3826.008, "learn_time_ms": 1193.503, "learn_throughput": 10724.728, "synch_weights_time_ms": 18.438}, "counters": {"num_env_steps_sampled": 128000, "num_env_steps_trained": 128000, "num_agent_steps_sampled": 256000, "num_agent_steps_trained": 256000}, "done": false, "episodes_total": 320, "training_iteration": 10, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-34", "timestamp": 1666580434, "time_this_iter_s": 3.78620982170105, "time_total_s": 38.80704379081726, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 38.80704379081726, "timesteps_since_restore": 0, "iterations_since_restore": 10, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.04, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 0.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 14.8, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.04, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.17, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.39, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.48, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.42, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.26, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.77, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.49, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.42, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.42, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.00019531250291038305, "cur_lr": 0.0010000000474974513, "total_loss": -0.056794971227645874, "policy_loss": -0.003975650295615196, "vf_loss": 1.1292062997817993, "vf_explained_var": 0.03024188242852688, "kl": 0.0006997665041126311, "entropy": 1.7786418199539185, "entropy_coeff": 0.029759999364614487, "model": {}}}}, "num_env_steps_sampled": 140800, "num_env_steps_trained": 140800, "num_agent_steps_sampled": 281600, "num_agent_steps_trained": 281600}, "sampler_results": {"episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 16.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 39.0}, "policy_reward_mean": {"ppo": 8.0}, "custom_metrics": {"sparse_reward_mean": 0.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 14.8, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.04, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.17, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.39, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 3.48, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.45, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 4, "potting_onion_agent_1_mean": 1.42, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 2.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.26, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 0.77, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.49, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 4, "optimal_onion_potting_agent_1_mean": 1.42, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 4, "viable_onion_potting_agent_1_mean": 1.42, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029510683441907, "mean_inference_ms": 1.2249036275862428, "mean_action_processing_ms": 0.13510709218572114, "mean_env_wait_ms": 0.854133319135467, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 65.0, "episode_reward_min": 3.0, "episode_reward_mean": 16.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 39.0}, "policy_reward_mean": {"ppo": 8.0}, "hist_stats": {"episode_reward": [6.0, 17.0, 14.0, 3.0, 3.0, 14.0, 16.0, 11.0, 14.0, 16.0, 22.0, 3.0, 26.0, 3.0, 20.0, 57.0, 14.0, 27.0, 3.0, 6.0, 11.0, 3.0, 20.0, 9.0, 16.0, 19.0, 17.0, 22.0, 14.0, 25.0, 11.0, 14.0, 20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [0.0, 6.0, 8.0, 9.0, 8.0, 6.0, 0.0, 3.0, 0.0, 3.0, 8.0, 6.0, 13.0, 3.0, 6.0, 5.0, 8.0, 6.0, 3.0, 13.0, 8.0, 14.0, 0.0, 3.0, 11.0, 15.0, 0.0, 3.0, 9.0, 11.0, 26.0, 31.0, 3.0, 11.0, 13.0, 14.0, 3.0, 0.0, 0.0, 6.0, 11.0, 0.0, 3.0, 0.0, 12.0, 8.0, 9.0, 0.0, 3.0, 13.0, 11.0, 8.0, 14.0, 3.0, 6.0, 16.0, 3.0, 11.0, 12.0, 13.0, 3.0, 8.0, 3.0, 11.0, 11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029510683441907, "mean_inference_ms": 1.2249036275862428, "mean_action_processing_ms": 0.13510709218572114, "mean_env_wait_ms": 0.854133319135467, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 281600, "num_agent_steps_trained": 281600, "num_env_steps_sampled": 140800, "num_env_steps_trained": 140800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 140800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 281600, "timers": {"training_iteration_time_ms": 3748.712, "learn_time_ms": 1130.334, "learn_throughput": 11324.089, "synch_weights_time_ms": 14.789}, "counters": {"num_env_steps_sampled": 140800, "num_env_steps_trained": 140800, "num_agent_steps_sampled": 281600, "num_agent_steps_trained": 281600}, "done": false, "episodes_total": 352, "training_iteration": 11, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-38", "timestamp": 1666580438, "time_this_iter_s": 3.776998996734619, "time_total_s": 42.58404278755188, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 42.58404278755188, "timesteps_since_restore": 0, "iterations_since_restore": 11, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.116666666666667, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 15.87, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.09, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.43, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.32, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.47, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.41, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 1.46, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.94, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.89, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.25, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.5, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.41, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 1.46, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.41, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 1.46, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.765625145519152e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.02605297975242138, "policy_loss": -0.003543408587574959, "vf_loss": 1.2588311433792114, "vf_explained_var": 0.027413932606577873, "kl": 0.0007508020498789847, "entropy": 1.7772871255874634, "entropy_coeff": 0.012736000120639801, "model": {}}}}, "num_env_steps_sampled": 153600, "num_env_steps_trained": 153600, "num_agent_steps_sampled": 307200, "num_agent_steps_trained": 307200}, "sampler_results": {"episode_reward_max": 74.0, "episode_reward_min": 3.0, "episode_reward_mean": 17.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 40.0}, "policy_reward_mean": {"ppo": 8.935}, "custom_metrics": {"sparse_reward_mean": 1.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 15.87, "shaped_reward_min": 3, "shaped_reward_max": 36, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.09, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.43, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 11, "useful_onion_pickup_agent_0_mean": 3.32, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.47, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.35, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.41, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 1.46, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.94, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.89, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.25, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.5, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.41, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 1.46, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.41, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 1.46, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7022611330961541, "mean_inference_ms": 1.2233884271505058, "mean_action_processing_ms": 0.13504361218921432, "mean_env_wait_ms": 0.8554002743051271, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 74.0, "episode_reward_min": 3.0, "episode_reward_mean": 17.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 40.0}, "policy_reward_mean": {"ppo": 8.935}, "hist_stats": {"episode_reward": [20.0, 11.0, 3.0, 14.0, 6.0, 20.0, 25.0, 9.0, 11.0, 25.0, 20.0, 3.0, 31.0, 60.0, 17.0, 17.0, 3.0, 9.0, 6.0, 11.0, 8.0, 14.0, 25.0, 11.0, 6.0, 9.0, 14.0, 17.0, 22.0, 22.0, 11.0, 6.0, 9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 9.0, 3.0, 8.0, 3.0, 0.0, 11.0, 3.0, 3.0, 3.0, 6.0, 14.0, 17.0, 8.0, 6.0, 3.0, 5.0, 6.0, 17.0, 8.0, 12.0, 8.0, 3.0, 0.0, 17.0, 14.0, 25.0, 35.0, 0.0, 17.0, 6.0, 11.0, 0.0, 3.0, 9.0, 0.0, 3.0, 3.0, 3.0, 8.0, 5.0, 3.0, 8.0, 6.0, 9.0, 16.0, 8.0, 3.0, 0.0, 6.0, 3.0, 6.0, 9.0, 5.0, 9.0, 8.0, 13.0, 9.0, 6.0, 16.0, 3.0, 8.0, 0.0, 6.0, 6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7022611330961541, "mean_inference_ms": 1.2233884271505058, "mean_action_processing_ms": 0.13504361218921432, "mean_env_wait_ms": 0.8554002743051271, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 307200, "num_agent_steps_trained": 307200, "num_env_steps_sampled": 153600, "num_env_steps_trained": 153600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 153600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 307200, "timers": {"training_iteration_time_ms": 3741.977, "learn_time_ms": 1130.394, "learn_throughput": 11323.486, "synch_weights_time_ms": 14.516}, "counters": {"num_env_steps_sampled": 153600, "num_env_steps_trained": 153600, "num_agent_steps_sampled": 307200, "num_agent_steps_trained": 307200}, "done": false, "episodes_total": 384, "training_iteration": 12, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-42", "timestamp": 1666580442, "time_this_iter_s": 3.747410535812378, "time_total_s": 46.33145332336426, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 46.33145332336426, "timesteps_since_restore": 0, "iterations_since_restore": 12, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.68, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 1.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 16.88, "shaped_reward_min": 3, "shaped_reward_max": 39, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.85, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.36, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.13, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.48, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.45, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.53, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.15, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.9, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.92, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.5, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.45, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.53, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.45, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.53, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.882812572759576e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.005024836398661137, "policy_loss": -0.004262564238160849, "vf_loss": 1.2552039623260498, "vf_explained_var": 0.04432354122400284, "kl": 0.0007258389960043132, "entropy": 1.7756567001342773, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 166400, "num_env_steps_trained": 166400, "num_agent_steps_sampled": 332800, "num_agent_steps_trained": 332800}, "sampler_results": {"episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 19.68, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 9.84}, "custom_metrics": {"sparse_reward_mean": 1.4, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 16.88, "shaped_reward_min": 3, "shaped_reward_max": 39, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 4.85, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 14, "onion_pickup_agent_1_mean": 5.36, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.13, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.48, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.95, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 11, "onion_drop_agent_1_mean": 3.4, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.45, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.53, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.15, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 3.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 1.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.9, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.92, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 0.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.5, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.47, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.45, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.53, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.45, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.53, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7019351354387914, "mean_inference_ms": 1.2218941189577206, "mean_action_processing_ms": 0.13495204944654685, "mean_env_wait_ms": 0.8561416115977619, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 19.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 9.84}, "hist_stats": {"episode_reward": [9.0, 3.0, 36.0, 9.0, 20.0, 14.0, 6.0, 9.0, 14.0, 14.0, 25.0, 14.0, 20.0, 3.0, 25.0, 6.0, 28.0, 11.0, 16.0, 22.0, 11.0, 9.0, 17.0, 9.0, 11.0, 25.0, 17.0, 25.0, 14.0, 65.0, 25.0, 31.0, 11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 3.0, 0.0, 19.0, 17.0, 3.0, 6.0, 11.0, 9.0, 9.0, 5.0, 6.0, 0.0, 0.0, 9.0, 11.0, 3.0, 8.0, 6.0, 16.0, 9.0, 6.0, 8.0, 8.0, 12.0, 3.0, 0.0, 14.0, 11.0, 6.0, 0.0, 11.0, 17.0, 8.0, 3.0, 8.0, 8.0, 13.0, 9.0, 6.0, 5.0, 6.0, 3.0, 8.0, 9.0, 3.0, 6.0, 3.0, 8.0, 11.0, 14.0, 3.0, 14.0, 11.0, 14.0, 8.0, 6.0, 39.0, 26.0, 6.0, 19.0, 12.0, 19.0, 3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7019351354387914, "mean_inference_ms": 1.2218941189577206, "mean_action_processing_ms": 0.13495204944654685, "mean_env_wait_ms": 0.8561416115977619, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 332800, "num_agent_steps_trained": 332800, "num_env_steps_sampled": 166400, "num_env_steps_trained": 166400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 166400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 332800, "timers": {"training_iteration_time_ms": 3744.604, "learn_time_ms": 1137.722, "learn_throughput": 11250.556, "synch_weights_time_ms": 14.033}, "counters": {"num_env_steps_sampled": 166400, "num_env_steps_trained": 166400, "num_agent_steps_sampled": 332800, "num_agent_steps_trained": 332800}, "done": false, "episodes_total": 416, "training_iteration": 13, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-46", "timestamp": 1666580446, "time_this_iter_s": 3.7613608837127686, "time_total_s": 50.092814207077026, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 50.092814207077026, "timesteps_since_restore": 0, "iterations_since_restore": 13, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.98333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 1.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 17.55, "shaped_reward_min": 3, "shaped_reward_max": 41, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.06, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.41, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.06, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.32, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.76, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.52, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.57, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.89, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.6, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.52, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.57, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.52, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.57, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.441406286379788e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.005547315813601017, "policy_loss": -0.004789750557392836, "vf_loss": 1.2801625728607178, "vf_explained_var": 0.0378531739115715, "kl": 0.0008417462231591344, "entropy": 1.7712035179138184, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 179200, "num_env_steps_trained": 179200, "num_agent_steps_sampled": 358400, "num_agent_steps_trained": 358400}, "sampler_results": {"episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 20.75, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 10.375}, "custom_metrics": {"sparse_reward_mean": 1.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 17.55, "shaped_reward_min": 3, "shaped_reward_max": 41, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.06, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.41, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 10, "useful_onion_pickup_agent_0_mean": 3.06, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 3.32, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.76, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.52, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.57, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.89, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.6, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 2, "dish_drop_agent_0_mean": 2.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.78, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.65, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.98, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.55, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.52, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.57, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.52, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.57, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7012446935769026, "mean_inference_ms": 1.2207221630940694, "mean_action_processing_ms": 0.13481071649814982, "mean_env_wait_ms": 0.8625244321628243, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 20.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 10.375}, "hist_stats": {"episode_reward": [11.0, 14.0, 17.0, 33.0, 22.0, 14.0, 14.0, 23.0, 20.0, 19.0, 8.0, 25.0, 9.0, 8.0, 27.0, 14.0, 74.0, 71.0, 63.0, 19.0, 17.0, 14.0, 14.0, 9.0, 34.0, 3.0, 28.0, 25.0, 14.0, 3.0, 6.0, 23.0, 9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 8.0, 8.0, 6.0, 9.0, 8.0, 19.0, 14.0, 11.0, 11.0, 3.0, 11.0, 14.0, 0.0, 12.0, 11.0, 14.0, 6.0, 14.0, 5.0, 0.0, 8.0, 16.0, 9.0, 6.0, 3.0, 8.0, 0.0, 21.0, 6.0, 0.0, 14.0, 40.0, 34.0, 34.0, 37.0, 25.0, 38.0, 6.0, 13.0, 3.0, 14.0, 6.0, 8.0, 3.0, 11.0, 9.0, 0.0, 15.0, 19.0, 0.0, 3.0, 19.0, 9.0, 10.0, 15.0, 8.0, 6.0, 3.0, 0.0, 6.0, 0.0, 12.0, 11.0, 6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7012446935769026, "mean_inference_ms": 1.2207221630940694, "mean_action_processing_ms": 0.13481071649814982, "mean_env_wait_ms": 0.8625244321628243, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 358400, "num_agent_steps_trained": 358400, "num_env_steps_sampled": 179200, "num_env_steps_trained": 179200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 179200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 358400, "timers": {"training_iteration_time_ms": 3767.559, "learn_time_ms": 1141.018, "learn_throughput": 11218.054, "synch_weights_time_ms": 13.84}, "counters": {"num_env_steps_sampled": 179200, "num_env_steps_trained": 179200, "num_agent_steps_sampled": 358400, "num_agent_steps_trained": 358400}, "done": false, "episodes_total": 448, "training_iteration": 14, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-50", "timestamp": 1666580450, "time_this_iter_s": 3.917694330215454, "time_total_s": 54.01050853729248, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 54.01050853729248, "timesteps_since_restore": 0, "iterations_since_restore": 14, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.016666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 1.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 18.04, "shaped_reward_min": 3, "shaped_reward_max": 41, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.26, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.11, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.63, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.65, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.26, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.49, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.63, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.65, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.63, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.65, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.220703143189894e-05, "cur_lr": 0.0010000000474974513, "total_loss": -0.0029025180265307426, "policy_loss": -0.0021586534567177296, "vf_loss": 1.3945486545562744, "vf_explained_var": 0.05108204483985901, "kl": 0.0008824581163935363, "entropy": 1.766658902168274, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 192000, "num_env_steps_trained": 192000, "num_agent_steps_sampled": 384000, "num_agent_steps_trained": 384000}, "sampler_results": {"episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 21.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 10.62}, "custom_metrics": {"sparse_reward_mean": 1.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 18.04, "shaped_reward_min": 3, "shaped_reward_max": 41, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.3, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.26, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.11, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.63, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 1.65, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 2.8, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.29, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 1, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 9, "soup_pickup_agent_0_mean": 0.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.26, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.49, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.63, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 1.65, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.63, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 1.65, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7002415829283244, "mean_inference_ms": 1.219227588262156, "mean_action_processing_ms": 0.13465034540587548, "mean_env_wait_ms": 0.8677968402407349, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 79.0, "episode_reward_min": 3.0, "episode_reward_mean": 21.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 46.0}, "policy_reward_mean": {"ppo": 10.62}, "hist_stats": {"episode_reward": [9.0, 6.0, 19.0, 9.0, 39.0, 22.0, 22.0, 11.0, 14.0, 3.0, 20.0, 19.0, 14.0, 14.0, 19.0, 9.0, 60.0, 14.0, 79.0, 22.0, 22.0, 16.0, 16.0, 22.0, 19.0, 23.0, 11.0, 16.0, 11.0, 3.0, 6.0, 3.0, 14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 3.0, 0.0, 6.0, 11.0, 8.0, 3.0, 6.0, 17.0, 22.0, 22.0, 0.0, 5.0, 17.0, 3.0, 8.0, 8.0, 6.0, 0.0, 3.0, 14.0, 6.0, 8.0, 11.0, 6.0, 8.0, 6.0, 8.0, 8.0, 11.0, 6.0, 3.0, 31.0, 29.0, 3.0, 11.0, 46.0, 33.0, 14.0, 8.0, 14.0, 8.0, 10.0, 6.0, 10.0, 6.0, 16.0, 6.0, 16.0, 3.0, 11.0, 12.0, 5.0, 6.0, 3.0, 13.0, 3.0, 8.0, 0.0, 3.0, 0.0, 6.0, 3.0, 0.0, 6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7002415829283244, "mean_inference_ms": 1.219227588262156, "mean_action_processing_ms": 0.13465034540587548, "mean_env_wait_ms": 0.8677968402407349, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 384000, "num_agent_steps_trained": 384000, "num_env_steps_sampled": 192000, "num_env_steps_trained": 192000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 192000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 384000, "timers": {"training_iteration_time_ms": 3756.788, "learn_time_ms": 1136.217, "learn_throughput": 11265.456, "synch_weights_time_ms": 12.889}, "counters": {"num_env_steps_sampled": 192000, "num_env_steps_trained": 192000, "num_agent_steps_sampled": 384000, "num_agent_steps_trained": 384000}, "done": false, "episodes_total": 480, "training_iteration": 15, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-54", "timestamp": 1666580454, "time_this_iter_s": 3.7644498348236084, "time_total_s": 57.77495837211609, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 57.77495837211609, "timesteps_since_restore": 0, "iterations_since_restore": 15, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.28, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 2.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 20.16, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.64, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.44, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.38, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.86, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.86, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.79, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.62, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.68, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.63, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.04, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.17, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.53, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.86, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.79, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.86, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.79, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.10351571594947e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.001889559207484126, "policy_loss": -0.0011786052491515875, "vf_loss": 1.7099534273147583, "vf_explained_var": 0.04326528683304787, "kl": 0.0008639540756121278, "entropy": 1.7639117240905762, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 204800, "num_env_steps_trained": 204800, "num_agent_steps_sampled": 409600, "num_agent_steps_trained": 409600}, "sampler_results": {"episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 24.16, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 12.08}, "custom_metrics": {"sparse_reward_mean": 2.0, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 20.16, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.64, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.44, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.38, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 8, "onion_drop_agent_0_mean": 3.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 3.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.86, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.86, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.79, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.62, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.68, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.73, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.63, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.04, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.17, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.53, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.86, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.79, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.86, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.79, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990310521400235, "mean_inference_ms": 1.2183144516569073, "mean_action_processing_ms": 0.13449427945373685, "mean_env_wait_ms": 0.8729521794459285, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 24.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 12.08}, "hist_stats": {"episode_reward": [14.0, 16.0, 20.0, 68.0, 9.0, 22.0, 22.0, 28.0, 9.0, 22.0, 17.0, 22.0, 14.0, 12.0, 3.0, 11.0, 25.0, 20.0, 14.0, 62.0, 60.0, 23.0, 31.0, 17.0, 28.0, 41.0, 20.0, 8.0, 36.0, 17.0, 3.0, 12.0, 22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 8.0, 8.0, 8.0, 8.0, 12.0, 34.0, 34.0, 3.0, 6.0, 9.0, 13.0, 9.0, 13.0, 17.0, 11.0, 6.0, 3.0, 11.0, 11.0, 9.0, 8.0, 11.0, 11.0, 3.0, 11.0, 9.0, 3.0, 0.0, 3.0, 0.0, 11.0, 6.0, 19.0, 14.0, 6.0, 3.0, 11.0, 33.0, 29.0, 29.0, 31.0, 12.0, 11.0, 17.0, 14.0, 14.0, 3.0, 14.0, 14.0, 19.0, 22.0, 8.0, 12.0, 8.0, 0.0, 19.0, 17.0, 3.0, 14.0, 0.0, 3.0, 6.0, 6.0, 16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990310521400235, "mean_inference_ms": 1.2183144516569073, "mean_action_processing_ms": 0.13449427945373685, "mean_env_wait_ms": 0.8729521794459285, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 409600, "num_agent_steps_trained": 409600, "num_env_steps_sampled": 204800, "num_env_steps_trained": 204800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 204800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 409600, "timers": {"training_iteration_time_ms": 3728.323, "learn_time_ms": 1113.304, "learn_throughput": 11497.313, "synch_weights_time_ms": 12.737}, "counters": {"num_env_steps_sampled": 204800, "num_env_steps_trained": 204800, "num_agent_steps_sampled": 409600, "num_agent_steps_trained": 409600}, "done": false, "episodes_total": 512, "training_iteration": 16, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-00-58", "timestamp": 1666580458, "time_this_iter_s": 3.6585028171539307, "time_total_s": 61.43346118927002, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 61.43346118927002, "timesteps_since_restore": 0, "iterations_since_restore": 16, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.316666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 2.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.0, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.51, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.38, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.65, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.63, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.96, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.88, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.76, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.01, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.96, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.96, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.051757857974735e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.004403861239552498, "policy_loss": -0.0036931377835571766, "vf_loss": 1.6866822242736816, "vf_explained_var": 0.03509732335805893, "kl": 0.0009732272010296583, "entropy": 1.758791446685791, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 217600, "num_env_steps_trained": 217600, "num_agent_steps_sampled": 435200, "num_agent_steps_trained": 435200}, "sampler_results": {"episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 26.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 13.1}, "custom_metrics": {"sparse_reward_mean": 2.6, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.0, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.51, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.38, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.65, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 3.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 12, "useful_onion_drop_agent_0_mean": 1.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.63, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 1.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 1.96, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 2.88, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 2.76, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.98, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.01, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.43, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 1.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 1.96, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 1.96, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698113649734605, "mean_inference_ms": 1.2171757494183797, "mean_action_processing_ms": 0.13440137759174353, "mean_env_wait_ms": 0.8729869098636517, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 26.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 13.1}, "hist_stats": {"episode_reward": [22.0, 12.0, 9.0, 19.0, 20.0, 9.0, 19.0, 6.0, 9.0, 6.0, 22.0, 25.0, 31.0, 36.0, 17.0, 25.0, 25.0, 23.0, 14.0, 9.0, 37.0, 65.0, 25.0, 24.0, 14.0, 11.0, 20.0, 63.0, 22.0, 9.0, 17.0, 25.0, 25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 6.0, 6.0, 6.0, 3.0, 6.0, 3.0, 16.0, 11.0, 9.0, 6.0, 3.0, 6.0, 13.0, 6.0, 0.0, 0.0, 9.0, 3.0, 3.0, 12.0, 10.0, 16.0, 9.0, 9.0, 22.0, 6.0, 30.0, 11.0, 6.0, 12.0, 13.0, 9.0, 16.0, 8.0, 15.0, 6.0, 8.0, 6.0, 3.0, 23.0, 14.0, 34.0, 31.0, 19.0, 6.0, 11.0, 13.0, 0.0, 14.0, 11.0, 0.0, 3.0, 17.0, 32.0, 31.0, 11.0, 11.0, 6.0, 3.0, 17.0, 0.0, 14.0, 11.0, 13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698113649734605, "mean_inference_ms": 1.2171757494183797, "mean_action_processing_ms": 0.13440137759174353, "mean_env_wait_ms": 0.8729869098636517, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 435200, "num_agent_steps_trained": 435200, "num_env_steps_sampled": 217600, "num_env_steps_trained": 217600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 217600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 435200, "timers": {"training_iteration_time_ms": 3712.225, "learn_time_ms": 1107.87, "learn_throughput": 11553.697, "synch_weights_time_ms": 12.655}, "counters": {"num_env_steps_sampled": 217600, "num_env_steps_trained": 217600, "num_agent_steps_sampled": 435200, "num_agent_steps_trained": 435200}, "done": false, "episodes_total": 544, "training_iteration": 17, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-02", "timestamp": 1666580462, "time_this_iter_s": 3.7620363235473633, "time_total_s": 65.19549751281738, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 65.19549751281738, "timesteps_since_restore": 0, "iterations_since_restore": 17, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.080000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 2.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.62, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.22, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.54, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.84, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.66, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.03, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.88, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.46, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.79, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.7, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.03, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.03, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.5258789289873675e-06, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026260181330144405, "policy_loss": -0.0018984454218298197, "vf_loss": 1.5118250846862793, "vf_explained_var": 0.10999676585197449, "kl": 0.0011376581387594342, "entropy": 1.757513403892517, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 230400, "num_env_steps_trained": 230400, "num_agent_steps_sampled": 460800, "num_agent_steps_trained": 460800}, "sampler_results": {"episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 27.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 13.61}, "custom_metrics": {"sparse_reward_mean": 2.8, "sparse_reward_min": 0, "sparse_reward_max": 20, "shaped_reward_mean": 21.62, "shaped_reward_min": 3, "shaped_reward_max": 50, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.22, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 11, "onion_pickup_agent_1_mean": 5.71, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.54, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.84, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.66, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.84, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.03, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 5, "dish_pickup_agent_0_mean": 3.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.88, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.46, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.79, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.7, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 1.84, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.03, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 5, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.84, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.03, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 5, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973220319660581, "mean_inference_ms": 1.2163561047550697, "mean_action_processing_ms": 0.1342786781324832, "mean_env_wait_ms": 0.872687151210557, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 90.0, "episode_reward_min": 3.0, "episode_reward_mean": 27.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 56.0}, "policy_reward_mean": {"ppo": 13.61}, "hist_stats": {"episode_reward": [25.0, 14.0, 68.0, 9.0, 36.0, 14.0, 42.0, 90.0, 44.0, 36.0, 11.0, 25.0, 9.0, 6.0, 12.0, 23.0, 73.0, 17.0, 77.0, 9.0, 17.0, 66.0, 14.0, 29.0, 9.0, 36.0, 33.0, 12.0, 20.0, 14.0, 25.0, 38.0, 6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [13.0, 12.0, 6.0, 8.0, 28.0, 40.0, 3.0, 6.0, 25.0, 11.0, 14.0, 0.0, 23.0, 19.0, 34.0, 56.0, 23.0, 21.0, 17.0, 19.0, 6.0, 5.0, 8.0, 17.0, 6.0, 3.0, 3.0, 3.0, 6.0, 6.0, 14.0, 9.0, 36.0, 37.0, 12.0, 5.0, 45.0, 32.0, 6.0, 3.0, 3.0, 14.0, 38.0, 28.0, 8.0, 6.0, 14.0, 15.0, 3.0, 6.0, 22.0, 14.0, 5.0, 28.0, 6.0, 6.0, 11.0, 9.0, 6.0, 8.0, 11.0, 14.0, 6.0, 32.0, 3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973220319660581, "mean_inference_ms": 1.2163561047550697, "mean_action_processing_ms": 0.1342786781324832, "mean_env_wait_ms": 0.872687151210557, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 460800, "num_agent_steps_trained": 460800, "num_env_steps_sampled": 230400, "num_env_steps_trained": 230400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 230400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 460800, "timers": {"training_iteration_time_ms": 3716.285, "learn_time_ms": 1113.511, "learn_throughput": 11495.167, "synch_weights_time_ms": 13.323}, "counters": {"num_env_steps_sampled": 230400, "num_env_steps_trained": 230400, "num_agent_steps_sampled": 460800, "num_agent_steps_trained": 460800}, "done": false, "episodes_total": 576, "training_iteration": 18, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-06", "timestamp": 1666580466, "time_this_iter_s": 3.773197650909424, "time_total_s": 68.9686951637268, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 68.9686951637268, "timesteps_since_restore": 0, "iterations_since_restore": 18, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.066666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 4.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 22.6, "shaped_reward_min": 3, "shaped_reward_max": 51, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.15, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.87, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.06, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.71, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.92, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.11, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.23, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.72, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 1.92, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.11, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.92, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.11, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.629394644936838e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.005456134676933289, "policy_loss": -0.004799796734005213, "vf_loss": 2.1967272758483887, "vf_explained_var": 0.07287011295557022, "kl": 0.0010914739686995745, "entropy": 1.7520215511322021, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 243200, "num_env_steps_trained": 243200, "num_agent_steps_sampled": 486400, "num_agent_steps_trained": 486400}, "sampler_results": {"episode_reward_max": 119.0, "episode_reward_min": 3.0, "episode_reward_mean": 31.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 15.5}, "custom_metrics": {"sparse_reward_mean": 4.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 22.6, "shaped_reward_min": 3, "shaped_reward_max": 51, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.15, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.87, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 3.06, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.71, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 1.92, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 6, "potting_onion_agent_1_mean": 2.11, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.62, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 1.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.23, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.72, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 1.92, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 6, "optimal_onion_potting_agent_1_mean": 2.11, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 1.92, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 6, "viable_onion_potting_agent_1_mean": 2.11, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6967297222460701, "mean_inference_ms": 1.2153942149654409, "mean_action_processing_ms": 0.13419810737632373, "mean_env_wait_ms": 0.8722581256551439, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 119.0, "episode_reward_min": 3.0, "episode_reward_mean": 31.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 15.5}, "hist_stats": {"episode_reward": [6.0, 19.0, 3.0, 19.0, 11.0, 22.0, 36.0, 6.0, 42.0, 65.0, 25.0, 12.0, 17.0, 73.0, 28.0, 66.0, 85.0, 28.0, 6.0, 14.0, 57.0, 6.0, 28.0, 31.0, 57.0, 17.0, 27.0, 9.0, 17.0, 22.0, 27.0, 12.0, 22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 3.0, 11.0, 8.0, 0.0, 3.0, 11.0, 8.0, 8.0, 3.0, 11.0, 11.0, 19.0, 17.0, 3.0, 3.0, 28.0, 14.0, 30.0, 35.0, 13.0, 12.0, 9.0, 3.0, 5.0, 12.0, 50.0, 23.0, 14.0, 14.0, 32.0, 34.0, 49.0, 36.0, 16.0, 12.0, 0.0, 6.0, 3.0, 11.0, 34.0, 23.0, 3.0, 3.0, 19.0, 9.0, 19.0, 12.0, 28.0, 29.0, 9.0, 8.0, 13.0, 14.0, 3.0, 6.0, 8.0, 9.0, 11.0, 11.0, 12.0, 15.0, 0.0, 12.0, 16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6967297222460701, "mean_inference_ms": 1.2153942149654409, "mean_action_processing_ms": 0.13419810737632373, "mean_env_wait_ms": 0.8722581256551439, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 486400, "num_agent_steps_trained": 486400, "num_env_steps_sampled": 243200, "num_env_steps_trained": 243200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 243200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 486400, "timers": {"training_iteration_time_ms": 3696.48, "learn_time_ms": 1100.823, "learn_throughput": 11627.666, "synch_weights_time_ms": 12.828}, "counters": {"num_env_steps_sampled": 243200, "num_env_steps_trained": 243200, "num_agent_steps_sampled": 486400, "num_agent_steps_trained": 486400}, "done": false, "episodes_total": 608, "training_iteration": 19, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-09", "timestamp": 1666580469, "time_this_iter_s": 3.6425294876098633, "time_total_s": 72.61122465133667, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 72.61122465133667, "timesteps_since_restore": 0, "iterations_since_restore": 19, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.14, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 3.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 22.58, "shaped_reward_min": 3, "shaped_reward_max": 51, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.13, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.72, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.07, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.65, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.79, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.02, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.12, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.6, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.37, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.68, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.69, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.02, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.12, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.02, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.12, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.814697322468419e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.0051202354952692986, "policy_loss": -0.0043845875188708305, "vf_loss": 1.4147591590881348, "vf_explained_var": 0.15972867608070374, "kl": 0.000972163223195821, "entropy": 1.7542475461959839, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 256000, "num_env_steps_trained": 256000, "num_agent_steps_sampled": 512000, "num_agent_steps_trained": 512000}, "sampler_results": {"episode_reward_max": 119.0, "episode_reward_min": 3.0, "episode_reward_mean": 29.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 14.89}, "custom_metrics": {"sparse_reward_mean": 3.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 22.58, "shaped_reward_min": 3, "shaped_reward_max": 51, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.13, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.72, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.07, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.65, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 3.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.79, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.02, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.12, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.59, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.62, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 0.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.6, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.22, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.37, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.68, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.69, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.02, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.12, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.02, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.12, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6964587722357374, "mean_inference_ms": 1.21476526717065, "mean_action_processing_ms": 0.13413883320263154, "mean_env_wait_ms": 0.8720606761944739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 119.0, "episode_reward_min": 3.0, "episode_reward_mean": 29.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 14.89}, "hist_stats": {"episode_reward": [22.0, 19.0, 20.0, 23.0, 9.0, 9.0, 22.0, 17.0, 19.0, 24.0, 3.0, 36.0, 28.0, 71.0, 22.0, 20.0, 23.0, 22.0, 30.0, 30.0, 3.0, 9.0, 9.0, 23.0, 28.0, 17.0, 36.0, 17.0, 9.0, 9.0, 20.0, 22.0, 33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 6.0, 13.0, 6.0, 6.0, 14.0, 6.0, 17.0, 0.0, 9.0, 9.0, 0.0, 11.0, 11.0, 8.0, 9.0, 9.0, 10.0, 3.0, 21.0, 3.0, 0.0, 20.0, 16.0, 25.0, 3.0, 42.0, 29.0, 11.0, 11.0, 6.0, 14.0, 6.0, 17.0, 11.0, 11.0, 16.0, 14.0, 13.0, 17.0, 0.0, 3.0, 9.0, 0.0, 6.0, 3.0, 14.0, 9.0, 15.0, 13.0, 6.0, 11.0, 13.0, 23.0, 8.0, 9.0, 3.0, 6.0, 3.0, 6.0, 6.0, 14.0, 16.0, 6.0, 24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6964587722357374, "mean_inference_ms": 1.21476526717065, "mean_action_processing_ms": 0.13413883320263154, "mean_env_wait_ms": 0.8720606761944739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 512000, "num_agent_steps_trained": 512000, "num_env_steps_sampled": 256000, "num_env_steps_trained": 256000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 256000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 512000, "timers": {"training_iteration_time_ms": 3678.582, "learn_time_ms": 1080.587, "learn_throughput": 11845.408, "synch_weights_time_ms": 11.897}, "counters": {"num_env_steps_sampled": 256000, "num_env_steps_trained": 256000, "num_agent_steps_sampled": 512000, "num_agent_steps_trained": 512000}, "done": false, "episodes_total": 640, "training_iteration": 20, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-13", "timestamp": 1666580473, "time_this_iter_s": 3.591038703918457, "time_total_s": 76.20226335525513, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 76.20226335525513, "timesteps_since_restore": 0, "iterations_since_restore": 20, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.21666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 5.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 24.63, "shaped_reward_min": 6, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.35, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.68, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.72, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.25, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.22, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.24, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.89, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.62, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.25, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.22, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.25, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.22, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.9073486612342094e-07, "cur_lr": 0.0010000000474974513, "total_loss": -0.003267324063926935, "policy_loss": -0.002591660711914301, "vf_loss": 1.9827957153320312, "vf_explained_var": 0.10969623923301697, "kl": 0.0012384429574012756, "entropy": 1.7478868961334229, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 268800, "num_env_steps_trained": 268800, "num_agent_steps_sampled": 537600, "num_agent_steps_trained": 537600}, "sampler_results": {"episode_reward_max": 119.0, "episode_reward_min": 6.0, "episode_reward_mean": 35.03, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 17.515}, "custom_metrics": {"sparse_reward_mean": 5.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 24.63, "shaped_reward_min": 6, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.35, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.68, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.72, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.25, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.22, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.24, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.89, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 0.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 8, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.62, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 2.25, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.22, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.25, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.22, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6960931178672759, "mean_inference_ms": 1.214054826686882, "mean_action_processing_ms": 0.13411483082756873, "mean_env_wait_ms": 0.8720779914357558, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 119.0, "episode_reward_min": 6.0, "episode_reward_mean": 35.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 61.0}, "policy_reward_mean": {"ppo": 17.515}, "hist_stats": {"episode_reward": [33.0, 62.0, 42.0, 68.0, 60.0, 28.0, 30.0, 36.0, 11.0, 28.0, 9.0, 14.0, 31.0, 91.0, 34.0, 62.0, 71.0, 68.0, 119.0, 66.0, 17.0, 9.0, 9.0, 33.0, 71.0, 20.0, 37.0, 28.0, 22.0, 60.0, 11.0, 42.0, 76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [24.0, 9.0, 26.0, 36.0, 19.0, 23.0, 40.0, 28.0, 29.0, 31.0, 9.0, 19.0, 14.0, 16.0, 14.0, 22.0, 8.0, 3.0, 14.0, 14.0, 3.0, 6.0, 6.0, 8.0, 20.0, 11.0, 40.0, 51.0, 22.0, 12.0, 23.0, 39.0, 31.0, 40.0, 37.0, 31.0, 61.0, 58.0, 32.0, 34.0, 11.0, 6.0, 3.0, 6.0, 3.0, 6.0, 18.0, 15.0, 34.0, 37.0, 14.0, 6.0, 22.0, 15.0, 14.0, 14.0, 8.0, 14.0, 31.0, 29.0, 8.0, 3.0, 20.0, 22.0, 37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6960931178672759, "mean_inference_ms": 1.214054826686882, "mean_action_processing_ms": 0.13411483082756873, "mean_env_wait_ms": 0.8720779914357558, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 537600, "num_agent_steps_trained": 537600, "num_env_steps_sampled": 268800, "num_env_steps_trained": 268800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 268800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 537600, "timers": {"training_iteration_time_ms": 3666.172, "learn_time_ms": 1066.079, "learn_throughput": 12006.614, "synch_weights_time_ms": 11.8}, "counters": {"num_env_steps_sampled": 268800, "num_env_steps_trained": 268800, "num_agent_steps_sampled": 537600, "num_agent_steps_trained": 537600}, "done": false, "episodes_total": 672, "training_iteration": 21, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-17", "timestamp": 1666580477, "time_this_iter_s": 3.6518807411193848, "time_total_s": 79.85414409637451, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 79.85414409637451, "timesteps_since_restore": 0, "iterations_since_restore": 21, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.16, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 5.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 24.74, "shaped_reward_min": 6, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.3, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.38, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.66, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.45, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.27, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.28, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.29, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.37, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.65, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.27, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.28, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.27, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.28, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.536743306171047e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.0045935483649373055, "policy_loss": -0.003943379037082195, "vf_loss": 2.199497699737549, "vf_explained_var": 0.14554493129253387, "kl": 0.0012895276304334402, "entropy": 1.740237832069397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 281600, "num_env_steps_trained": 281600, "num_agent_steps_sampled": 563200, "num_agent_steps_trained": 563200}, "sampler_results": {"episode_reward_max": 108.0, "episode_reward_min": 6.0, "episode_reward_mean": 34.74, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 60.0}, "policy_reward_mean": {"ppo": 17.37}, "custom_metrics": {"sparse_reward_mean": 5.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 24.74, "shaped_reward_min": 6, "shaped_reward_max": 53, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 5.3, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 12, "useful_onion_pickup_agent_0_mean": 3.38, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 3.66, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.5, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.45, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.27, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 5, "potting_onion_agent_1_mean": 2.28, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 3.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.07, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.7, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.29, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.37, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.65, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.27, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 5, "optimal_onion_potting_agent_1_mean": 2.28, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.27, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 5, "viable_onion_potting_agent_1_mean": 2.28, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957187968137986, "mean_inference_ms": 1.2132320138272397, "mean_action_processing_ms": 0.13407466594363157, "mean_env_wait_ms": 0.8722917900578605, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 108.0, "episode_reward_min": 6.0, "episode_reward_mean": 34.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 60.0}, "policy_reward_mean": {"ppo": 17.37}, "hist_stats": {"episode_reward": [76.0, 31.0, 50.0, 57.0, 9.0, 20.0, 12.0, 9.0, 74.0, 30.0, 6.0, 9.0, 28.0, 14.0, 12.0, 34.0, 8.0, 9.0, 25.0, 26.0, 25.0, 25.0, 25.0, 28.0, 17.0, 20.0, 9.0, 28.0, 28.0, 20.0, 20.0, 82.0, 76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [37.0, 39.0, 25.0, 6.0, 25.0, 25.0, 23.0, 34.0, 0.0, 9.0, 11.0, 9.0, 6.0, 6.0, 3.0, 6.0, 40.0, 34.0, 9.0, 21.0, 3.0, 3.0, 3.0, 6.0, 14.0, 14.0, 11.0, 3.0, 3.0, 9.0, 18.0, 16.0, 8.0, 0.0, 0.0, 9.0, 13.0, 12.0, 12.0, 14.0, 11.0, 14.0, 14.0, 11.0, 16.0, 9.0, 17.0, 11.0, 12.0, 5.0, 11.0, 9.0, 9.0, 0.0, 9.0, 19.0, 9.0, 19.0, 6.0, 14.0, 17.0, 3.0, 34.0, 48.0, 36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957187968137986, "mean_inference_ms": 1.2132320138272397, "mean_action_processing_ms": 0.13407466594363157, "mean_env_wait_ms": 0.8722917900578605, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 563200, "num_agent_steps_trained": 563200, "num_env_steps_sampled": 281600, "num_env_steps_trained": 281600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 281600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 563200, "timers": {"training_iteration_time_ms": 3668.242, "learn_time_ms": 1072.27, "learn_throughput": 11937.289, "synch_weights_time_ms": 11.797}, "counters": {"num_env_steps_sampled": 281600, "num_env_steps_trained": 281600, "num_agent_steps_sampled": 563200, "num_agent_steps_trained": 563200}, "done": false, "episodes_total": 704, "training_iteration": 22, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-21", "timestamp": 1666580481, "time_this_iter_s": 3.760704517364502, "time_total_s": 83.61484861373901, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 83.61484861373901, "timesteps_since_restore": 0, "iterations_since_restore": 22, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.1, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 6.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 27.32, "shaped_reward_min": 8, "shaped_reward_max": 54, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.42, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.77, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.84, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 2.59, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.89, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.15, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.39, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.76, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 2.59, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 2.59, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.7683716530855236e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024071987718343735, "policy_loss": -0.0017509105382487178, "vf_loss": 2.1249547004699707, "vf_explained_var": 0.18562087416648865, "kl": 0.001308967126533389, "entropy": 1.737565279006958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 294400, "num_env_steps_trained": 294400, "num_agent_steps_sampled": 588800, "num_agent_steps_trained": 588800}, "sampler_results": {"episode_reward_max": 108.0, "episode_reward_min": 8.0, "episode_reward_mean": 39.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 60.0}, "policy_reward_mean": {"ppo": 19.66}, "custom_metrics": {"sparse_reward_mean": 6.0, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 27.32, "shaped_reward_min": 8, "shaped_reward_max": 54, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.42, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.77, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 3.84, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 2.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 2.59, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 6, "dish_pickup_agent_0_mean": 2.89, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.15, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 2, "useful_dish_pickup_agent_1_mean": 0.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.32, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.39, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.76, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 2.59, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 6, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 2.59, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 6, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953188897289305, "mean_inference_ms": 1.2124798978585105, "mean_action_processing_ms": 0.13404729178491362, "mean_env_wait_ms": 0.8727043294602006, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 108.0, "episode_reward_min": 8.0, "episode_reward_mean": 39.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 60.0}, "policy_reward_mean": {"ppo": 19.66}, "hist_stats": {"episode_reward": [76.0, 14.0, 20.0, 9.0, 12.0, 28.0, 90.0, 31.0, 20.0, 33.0, 38.0, 70.0, 93.0, 14.0, 23.0, 23.0, 33.0, 9.0, 14.0, 9.0, 20.0, 71.0, 41.0, 12.0, 22.0, 60.0, 26.0, 31.0, 9.0, 34.0, 20.0, 68.0, 57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 40.0, 11.0, 3.0, 8.0, 12.0, 6.0, 3.0, 0.0, 12.0, 11.0, 17.0, 39.0, 51.0, 22.0, 9.0, 6.0, 14.0, 22.0, 11.0, 25.0, 13.0, 34.0, 36.0, 40.0, 53.0, 3.0, 11.0, 11.0, 12.0, 12.0, 11.0, 14.0, 19.0, 6.0, 3.0, 11.0, 3.0, 9.0, 0.0, 14.0, 6.0, 37.0, 34.0, 25.0, 16.0, 6.0, 6.0, 11.0, 11.0, 37.0, 23.0, 15.0, 11.0, 12.0, 19.0, 3.0, 6.0, 12.0, 22.0, 8.0, 12.0, 37.0, 31.0, 31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953188897289305, "mean_inference_ms": 1.2124798978585105, "mean_action_processing_ms": 0.13404729178491362, "mean_env_wait_ms": 0.8727043294602006, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 588800, "num_agent_steps_trained": 588800, "num_env_steps_sampled": 294400, "num_env_steps_trained": 294400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 294400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 588800, "timers": {"training_iteration_time_ms": 3682.352, "learn_time_ms": 1083.207, "learn_throughput": 11816.758, "synch_weights_time_ms": 11.952}, "counters": {"num_env_steps_sampled": 294400, "num_env_steps_trained": 294400, "num_agent_steps_sampled": 588800, "num_agent_steps_trained": 588800}, "done": false, "episodes_total": 736, "training_iteration": 23, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-25", "timestamp": 1666580485, "time_this_iter_s": 3.901500701904297, "time_total_s": 87.51634931564331, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 87.51634931564331, "timesteps_since_restore": 0, "iterations_since_restore": 23, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.016666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 7.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 29.4, "shaped_reward_min": 8, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.68, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.85, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.66, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.07, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 2.86, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.03, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.56, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 1.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.5, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.94, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 2.86, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 2.86, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.3841858265427618e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.0055618928745388985, "policy_loss": -0.004936683923006058, "vf_loss": 2.4280309677124023, "vf_explained_var": 0.1790190190076828, "kl": 0.0011535612866282463, "entropy": 1.7360193729400635, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 307200, "num_env_steps_trained": 307200, "num_agent_steps_sampled": 614400, "num_agent_steps_trained": 614400}, "sampler_results": {"episode_reward_max": 128.0, "episode_reward_min": 8.0, "episode_reward_mean": 43.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 21.9}, "custom_metrics": {"sparse_reward_mean": 7.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 29.4, "shaped_reward_min": 8, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.68, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 5.85, "onion_pickup_agent_1_min": 0, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.66, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.07, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 9, "onion_drop_agent_0_mean": 2.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.6, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 2.86, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.03, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.56, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.65, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.49, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 1.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.5, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.94, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 2.86, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 2.86, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950401586137636, "mean_inference_ms": 1.2118232379219347, "mean_action_processing_ms": 0.1340076109085824, "mean_env_wait_ms": 0.8730247289869201, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 128.0, "episode_reward_min": 8.0, "episode_reward_mean": 43.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 21.9}, "hist_stats": {"episode_reward": [57.0, 82.0, 65.0, 38.0, 9.0, 25.0, 9.0, 82.0, 25.0, 20.0, 38.0, 66.0, 66.0, 82.0, 57.0, 20.0, 9.0, 19.0, 23.0, 57.0, 12.0, 63.0, 50.0, 39.0, 8.0, 20.0, 25.0, 19.0, 108.0, 82.0, 30.0, 20.0, 47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [31.0, 26.0, 39.0, 43.0, 29.0, 36.0, 15.0, 23.0, 6.0, 3.0, 17.0, 8.0, 0.0, 9.0, 42.0, 40.0, 11.0, 14.0, 9.0, 11.0, 18.0, 20.0, 40.0, 26.0, 34.0, 32.0, 38.0, 44.0, 34.0, 23.0, 11.0, 9.0, 0.0, 9.0, 14.0, 5.0, 9.0, 14.0, 23.0, 34.0, 6.0, 6.0, 31.0, 32.0, 28.0, 22.0, 12.0, 27.0, 5.0, 3.0, 3.0, 17.0, 13.0, 12.0, 3.0, 16.0, 48.0, 60.0, 45.0, 37.0, 21.0, 9.0, 12.0, 8.0, 20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950401586137636, "mean_inference_ms": 1.2118232379219347, "mean_action_processing_ms": 0.1340076109085824, "mean_env_wait_ms": 0.8730247289869201, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 614400, "num_agent_steps_trained": 614400, "num_env_steps_sampled": 307200, "num_env_steps_trained": 307200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 307200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 614400, "timers": {"training_iteration_time_ms": 3677.506, "learn_time_ms": 1091.276, "learn_throughput": 11729.393, "synch_weights_time_ms": 11.902}, "counters": {"num_env_steps_sampled": 307200, "num_env_steps_trained": 307200, "num_agent_steps_sampled": 614400, "num_agent_steps_trained": 614400}, "done": false, "episodes_total": 768, "training_iteration": 24, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-29", "timestamp": 1666580489, "time_this_iter_s": 3.8679988384246826, "time_total_s": 91.384348154068, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 91.384348154068, "timesteps_since_restore": 0, "iterations_since_restore": 24, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.520000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 7.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 31.3, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.83, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.01, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.82, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.2, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.01, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 2.93, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.62, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.88, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.01, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.01, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.1920929132713809e-08, "cur_lr": 0.0010000000474974513, "total_loss": -0.0027541820891201496, "policy_loss": -0.0021353354677557945, "vf_loss": 2.474787473678589, "vf_explained_var": 0.18035045266151428, "kl": 0.0013497013133019209, "entropy": 1.7326486110687256, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 320000, "num_env_steps_trained": 320000, "num_agent_steps_sampled": 640000, "num_agent_steps_trained": 640000}, "sampler_results": {"episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 46.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 23.05}, "custom_metrics": {"sparse_reward_mean": 7.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 31.3, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.83, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.01, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.82, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 9, "useful_onion_pickup_agent_1_mean": 4.2, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 7, "potting_onion_agent_1_mean": 3.01, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 2.93, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 2.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.7, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.61, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 1.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 1.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.62, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 2, "soup_drop_agent_0_mean": 0.88, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 1.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 7, "optimal_onion_potting_agent_1_mean": 3.01, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 7, "viable_onion_potting_agent_1_mean": 3.01, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6947941057045602, "mean_inference_ms": 1.2115752069605927, "mean_action_processing_ms": 0.13399848174796214, "mean_env_wait_ms": 0.8736724055725108, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 46.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 23.05}, "hist_stats": {"episode_reward": [47.0, 50.0, 93.0, 20.0, 76.0, 31.0, 66.0, 34.0, 20.0, 20.0, 45.0, 94.0, 28.0, 87.0, 12.0, 74.0, 40.0, 9.0, 34.0, 39.0, 23.0, 88.0, 45.0, 40.0, 68.0, 9.0, 31.0, 36.0, 77.0, 36.0, 74.0, 17.0, 23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 27.0, 27.0, 23.0, 42.0, 51.0, 14.0, 6.0, 25.0, 51.0, 20.0, 11.0, 29.0, 37.0, 17.0, 17.0, 15.0, 5.0, 20.0, 0.0, 25.0, 20.0, 49.0, 45.0, 14.0, 14.0, 42.0, 45.0, 6.0, 6.0, 37.0, 37.0, 6.0, 34.0, 3.0, 6.0, 11.0, 23.0, 17.0, 22.0, 9.0, 14.0, 40.0, 48.0, 17.0, 28.0, 14.0, 26.0, 28.0, 40.0, 0.0, 9.0, 14.0, 17.0, 17.0, 19.0, 40.0, 37.0, 14.0, 22.0, 42.0, 32.0, 6.0, 11.0, 9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6947941057045602, "mean_inference_ms": 1.2115752069605927, "mean_action_processing_ms": 0.13399848174796214, "mean_env_wait_ms": 0.8736724055725108, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 640000, "num_agent_steps_trained": 640000, "num_env_steps_sampled": 320000, "num_env_steps_trained": 320000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 320000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 640000, "timers": {"training_iteration_time_ms": 3680.121, "learn_time_ms": 1087.853, "learn_throughput": 11766.301, "synch_weights_time_ms": 11.846}, "counters": {"num_env_steps_sampled": 320000, "num_env_steps_trained": 320000, "num_agent_steps_sampled": 640000, "num_agent_steps_trained": 640000}, "done": false, "episodes_total": 800, "training_iteration": 25, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-33", "timestamp": 1666580493, "time_this_iter_s": 3.779752731323242, "time_total_s": 95.16410088539124, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 95.16410088539124, "timesteps_since_restore": 0, "iterations_since_restore": 25, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.183333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 7.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 32.38, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.68, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.13, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.66, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.35, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.69, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.05, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.07, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.18, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.74, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.74, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.84, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.92, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.69, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.05, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.69, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.05, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.9604645663569045e-09, "cur_lr": 0.0010000000474974513, "total_loss": 8.647807408124208e-05, "policy_loss": 0.0006866119801998138, "vf_loss": 2.619206666946411, "vf_explained_var": 0.18624652922153473, "kl": 0.0013201497495174408, "entropy": 1.724108338356018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 332800, "num_env_steps_trained": 332800, "num_agent_steps_sampled": 665600, "num_agent_steps_trained": 665600}, "sampler_results": {"episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 47.58, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 23.79}, "custom_metrics": {"sparse_reward_mean": 7.6, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 32.38, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.68, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.13, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.66, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.35, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.36, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.69, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.05, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 8, "dish_pickup_agent_0_mean": 3.07, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.18, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.74, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 0.74, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.84, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.92, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 2.69, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.05, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 8, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.69, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.05, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 8, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945319759848374, "mean_inference_ms": 1.211253690174925, "mean_action_processing_ms": 0.13399742937214265, "mean_env_wait_ms": 0.874078587435082, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 47.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 68.0}, "policy_reward_mean": {"ppo": 23.79}, "hist_stats": {"episode_reward": [23.0, 9.0, 30.0, 9.0, 20.0, 56.0, 88.0, 20.0, 17.0, 128.0, 31.0, 11.0, 23.0, 82.0, 42.0, 14.0, 66.0, 25.0, 22.0, 99.0, 23.0, 45.0, 82.0, 76.0, 31.0, 31.0, 9.0, 22.0, 82.0, 71.0, 71.0, 85.0, 28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [9.0, 14.0, 6.0, 3.0, 16.0, 14.0, 6.0, 3.0, 14.0, 6.0, 28.0, 28.0, 44.0, 44.0, 14.0, 6.0, 11.0, 6.0, 60.0, 68.0, 12.0, 19.0, 8.0, 3.0, 9.0, 14.0, 37.0, 45.0, 25.0, 17.0, 9.0, 5.0, 32.0, 34.0, 16.0, 9.0, 11.0, 11.0, 50.0, 49.0, 0.0, 23.0, 20.0, 25.0, 40.0, 42.0, 47.0, 29.0, 19.0, 12.0, 14.0, 17.0, 6.0, 3.0, 16.0, 6.0, 42.0, 40.0, 36.0, 35.0, 37.0, 34.0, 48.0, 37.0, 16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945319759848374, "mean_inference_ms": 1.211253690174925, "mean_action_processing_ms": 0.13399742937214265, "mean_env_wait_ms": 0.874078587435082, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 665600, "num_agent_steps_trained": 665600, "num_env_steps_sampled": 332800, "num_env_steps_trained": 332800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 332800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 665600, "timers": {"training_iteration_time_ms": 3688.029, "learn_time_ms": 1098.54, "learn_throughput": 11651.827, "synch_weights_time_ms": 11.813}, "counters": {"num_env_steps_sampled": 332800, "num_env_steps_trained": 332800, "num_agent_steps_sampled": 665600, "num_agent_steps_trained": 665600}, "done": false, "episodes_total": 832, "training_iteration": 26, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-37", "timestamp": 1666580497, "time_this_iter_s": 3.7483062744140625, "time_total_s": 98.9124071598053, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 98.9124071598053, "timesteps_since_restore": 0, "iterations_since_restore": 26, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.216666666666665, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 8.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 34.33, "shaped_reward_min": 9, "shaped_reward_max": 58, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.33, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.17, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.7, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.62, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.72, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.37, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.38, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.8, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.98, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.72, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.37, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.72, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.37, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.9802322831784522e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.007030028849840164, "policy_loss": -0.006458994001150131, "vf_loss": 2.8966987133026123, "vf_explained_var": 0.18293237686157227, "kl": 0.0012218665797263384, "entropy": 1.7214064598083496, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 345600, "num_env_steps_trained": 345600, "num_agent_steps_sampled": 691200, "num_agent_steps_trained": 691200}, "sampler_results": {"episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 51.13, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 70.0}, "policy_reward_mean": {"ppo": 25.565}, "custom_metrics": {"sparse_reward_mean": 8.4, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 34.33, "shaped_reward_min": 9, "shaped_reward_max": 58, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.33, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.17, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.7, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.62, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 8, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.72, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.37, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 3.38, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.62, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.98, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.64, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 5, "soup_pickup_agent_1_mean": 1.82, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 0.8, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 2, "soup_delivery_agent_1_mean": 0.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.98, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 2.72, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.37, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.72, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.37, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6941808593457511, "mean_inference_ms": 1.2108438805531228, "mean_action_processing_ms": 0.13398529337925486, "mean_env_wait_ms": 0.8743727041239987, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 133.0, "episode_reward_min": 9.0, "episode_reward_mean": 51.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 70.0}, "policy_reward_mean": {"ppo": 25.565}, "hist_stats": {"episode_reward": [28.0, 63.0, 33.0, 25.0, 82.0, 93.0, 56.0, 25.0, 79.0, 20.0, 77.0, 79.0, 69.0, 23.0, 12.0, 28.0, 60.0, 29.0, 133.0, 85.0, 19.0, 66.0, 31.0, 17.0, 20.0, 12.0, 34.0, 20.0, 68.0, 28.0, 82.0, 48.0, 65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [16.0, 12.0, 20.0, 43.0, 25.0, 8.0, 11.0, 14.0, 37.0, 45.0, 46.0, 47.0, 28.0, 28.0, 17.0, 8.0, 33.0, 46.0, 11.0, 9.0, 34.0, 43.0, 34.0, 45.0, 34.0, 35.0, 17.0, 6.0, 3.0, 9.0, 17.0, 11.0, 23.0, 37.0, 15.0, 14.0, 65.0, 68.0, 39.0, 46.0, 14.0, 5.0, 35.0, 31.0, 14.0, 17.0, 11.0, 6.0, 11.0, 9.0, 3.0, 9.0, 17.0, 17.0, 9.0, 11.0, 37.0, 31.0, 18.0, 10.0, 34.0, 48.0, 25.0, 23.0, 36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6941808593457511, "mean_inference_ms": 1.2108438805531228, "mean_action_processing_ms": 0.13398529337925486, "mean_env_wait_ms": 0.8743727041239987, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 691200, "num_agent_steps_trained": 691200, "num_env_steps_sampled": 345600, "num_env_steps_trained": 345600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 345600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 691200, "timers": {"training_iteration_time_ms": 3679.065, "learn_time_ms": 1091.01, "learn_throughput": 11732.251, "synch_weights_time_ms": 11.709}, "counters": {"num_env_steps_sampled": 345600, "num_env_steps_trained": 345600, "num_agent_steps_sampled": 691200, "num_agent_steps_trained": 691200}, "done": false, "episodes_total": 864, "training_iteration": 27, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-41", "timestamp": 1666580501, "time_this_iter_s": 3.674090623855591, "time_total_s": 102.58649778366089, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 102.58649778366089, "timesteps_since_restore": 0, "iterations_since_restore": 27, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.1, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 9.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 34.85, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.32, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.5, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.6, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.59, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.5, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.91, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.59, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.5, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.59, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.5, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.4901161415892261e-09, "cur_lr": 0.0010000000474974513, "total_loss": -0.003428479889407754, "policy_loss": -0.0028413136024028063, "vf_loss": 2.718903064727783, "vf_explained_var": 0.2129439413547516, "kl": 0.0011951376218348742, "entropy": 1.7181081771850586, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 358400, "num_env_steps_trained": 358400, "num_agent_steps_sampled": 716800, "num_agent_steps_trained": 716800}, "sampler_results": {"episode_reward_max": 131.0, "episode_reward_min": 9.0, "episode_reward_mean": 53.25, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 73.0}, "policy_reward_mean": {"ppo": 26.625}, "custom_metrics": {"sparse_reward_mean": 9.2, "sparse_reward_min": 0, "sparse_reward_max": 40, "shaped_reward_mean": 34.85, "shaped_reward_min": 9, "shaped_reward_max": 59, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.18, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.32, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 13, "useful_onion_pickup_agent_0_mean": 3.5, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.6, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.33, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.59, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.5, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 7, "dish_pickup_agent_0_mean": 3.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.39, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.67, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.94, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.55, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 7, "soup_pickup_agent_0_mean": 1.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 0.91, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.71, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.9, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.59, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.5, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 7, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.59, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.5, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 7, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937747578849255, "mean_inference_ms": 1.2103427215659897, "mean_action_processing_ms": 0.13392971773962517, "mean_env_wait_ms": 0.8742234499034806, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 131.0, "episode_reward_min": 9.0, "episode_reward_mean": 53.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 73.0}, "policy_reward_mean": {"ppo": 26.625}, "hist_stats": {"episode_reward": [65.0, 26.0, 36.0, 33.0, 31.0, 31.0, 42.0, 28.0, 66.0, 42.0, 20.0, 31.0, 96.0, 87.0, 35.0, 73.0, 44.0, 82.0, 33.0, 41.0, 23.0, 25.0, 47.0, 85.0, 93.0, 9.0, 43.0, 28.0, 90.0, 53.0, 20.0, 38.0, 73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 29.0, 12.0, 14.0, 16.0, 20.0, 8.0, 25.0, 12.0, 19.0, 8.0, 23.0, 15.0, 27.0, 19.0, 9.0, 26.0, 40.0, 32.0, 10.0, 9.0, 11.0, 13.0, 18.0, 43.0, 53.0, 42.0, 45.0, 19.0, 16.0, 36.0, 37.0, 21.0, 23.0, 40.0, 42.0, 16.0, 17.0, 21.0, 20.0, 14.0, 9.0, 6.0, 19.0, 24.0, 23.0, 42.0, 43.0, 56.0, 37.0, 3.0, 6.0, 28.0, 15.0, 17.0, 11.0, 42.0, 48.0, 28.0, 25.0, 9.0, 11.0, 22.0, 16.0, 36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937747578849255, "mean_inference_ms": 1.2103427215659897, "mean_action_processing_ms": 0.13392971773962517, "mean_env_wait_ms": 0.8742234499034806, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 716800, "num_agent_steps_trained": 716800, "num_env_steps_sampled": 358400, "num_env_steps_trained": 358400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 358400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 716800, "timers": {"training_iteration_time_ms": 3676.763, "learn_time_ms": 1091.56, "learn_throughput": 11726.336, "synch_weights_time_ms": 11.323}, "counters": {"num_env_steps_sampled": 358400, "num_env_steps_trained": 358400, "num_agent_steps_sampled": 716800, "num_agent_steps_trained": 716800}, "done": false, "episodes_total": 896, "training_iteration": 28, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-45", "timestamp": 1666580505, "time_this_iter_s": 3.7481324672698975, "time_total_s": 106.33463025093079, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 106.33463025093079, "timesteps_since_restore": 0, "iterations_since_restore": 28, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.183333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 12.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 36.41, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.45, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.66, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.68, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.74, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.64, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.28, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.69, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.5, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 1.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.68, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.74, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.68, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.74, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.450580707946131e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.00494603905826807, "policy_loss": -0.004427894949913025, "vf_loss": 3.332706928253174, "vf_explained_var": 0.22580432891845703, "kl": 0.0014256751164793968, "entropy": 1.7028231620788574, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 371200, "num_env_steps_trained": 371200, "num_agent_steps_sampled": 742400, "num_agent_steps_trained": 742400}, "sampler_results": {"episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 60.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 30.205}, "custom_metrics": {"sparse_reward_mean": 12.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 36.41, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.45, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.66, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 8, "potting_onion_agent_0_mean": 2.68, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 8, "potting_onion_agent_1_mean": 3.74, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.64, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 3.28, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.69, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.5, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 1.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 1.03, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 0.57, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.79, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.68, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 8, "optimal_onion_potting_agent_1_mean": 3.74, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.68, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 8, "viable_onion_potting_agent_1_mean": 3.74, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931727354048686, "mean_inference_ms": 1.2108588597383083, "mean_action_processing_ms": 0.13381849352361283, "mean_env_wait_ms": 0.8752686389591946, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 60.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 30.205}, "hist_stats": {"episode_reward": [73.0, 92.0, 71.0, 39.0, 39.0, 87.0, 85.0, 125.0, 47.0, 28.0, 12.0, 53.0, 50.0, 78.0, 20.0, 34.0, 73.0, 70.0, 23.0, 20.0, 44.0, 31.0, 130.0, 58.0, 73.0, 85.0, 68.0, 12.0, 90.0, 60.0, 49.0, 45.0, 34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [36.0, 37.0, 44.0, 48.0, 34.0, 37.0, 24.0, 15.0, 24.0, 15.0, 50.0, 37.0, 39.0, 46.0, 62.0, 63.0, 30.0, 17.0, 14.0, 14.0, 6.0, 6.0, 12.0, 41.0, 37.0, 13.0, 41.0, 37.0, 6.0, 14.0, 15.0, 19.0, 31.0, 42.0, 30.0, 40.0, 3.0, 20.0, 12.0, 8.0, 17.0, 27.0, 13.0, 18.0, 60.0, 70.0, 22.0, 36.0, 31.0, 42.0, 44.0, 41.0, 26.0, 42.0, 6.0, 6.0, 47.0, 43.0, 26.0, 34.0, 27.0, 22.0, 20.0, 25.0, 17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931727354048686, "mean_inference_ms": 1.2108588597383083, "mean_action_processing_ms": 0.13381849352361283, "mean_env_wait_ms": 0.8752686389591946, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 742400, "num_agent_steps_trained": 742400, "num_env_steps_sampled": 371200, "num_env_steps_trained": 371200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 371200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 742400, "timers": {"training_iteration_time_ms": 3711.502, "learn_time_ms": 1094.516, "learn_throughput": 11694.668, "synch_weights_time_ms": 11.285}, "counters": {"num_env_steps_sampled": 371200, "num_env_steps_trained": 371200, "num_agent_steps_sampled": 742400, "num_agent_steps_trained": 742400}, "done": false, "episodes_total": 928, "training_iteration": 29, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-49", "timestamp": 1666580509, "time_this_iter_s": 3.9900293350219727, "time_total_s": 110.32465958595276, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 110.32465958595276, "timesteps_since_restore": 0, "iterations_since_restore": 29, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.96666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 12.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 37.76, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.76, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.67, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.98, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.73, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.93, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 3.81, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.63, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.31, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.91, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 1.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 1.0, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.75, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.93, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 3.81, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.93, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 3.81, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.7252903539730653e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.0029865906108170748, "policy_loss": -0.0024586068466305733, "vf_loss": 3.2020277976989746, "vf_explained_var": 0.23653024435043335, "kl": 0.0015087344218045473, "entropy": 1.6963691711425781, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 384000, "num_env_steps_trained": 384000, "num_agent_steps_sampled": 768000, "num_agent_steps_trained": 768000}, "sampler_results": {"episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 62.16, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 31.08}, "custom_metrics": {"sparse_reward_mean": 12.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 37.76, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 5.76, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 12, "onion_pickup_agent_1_mean": 6.67, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 3.98, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 4.73, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 10, "onion_drop_agent_0_mean": 2.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 2.44, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 2.93, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 3.81, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.63, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.31, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.72, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.61, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 1.91, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 1.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 5, "soup_delivery_agent_0_mean": 1.0, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.87, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 0.77, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.75, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 2.93, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 3.81, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 2.93, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 3.81, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924762482736284, "mean_inference_ms": 1.2114109394813388, "mean_action_processing_ms": 0.13370852013253243, "mean_env_wait_ms": 0.8763468400843789, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 62.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 31.08}, "hist_stats": {"episode_reward": [34.0, 25.0, 111.0, 39.0, 71.0, 9.0, 76.0, 81.0, 111.0, 34.0, 53.0, 36.0, 79.0, 70.0, 39.0, 131.0, 77.0, 9.0, 25.0, 22.0, 23.0, 20.0, 66.0, 99.0, 76.0, 79.0, 95.0, 87.0, 31.0, 81.0, 17.0, 12.0, 31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 17.0, 8.0, 17.0, 45.0, 66.0, 25.0, 14.0, 37.0, 34.0, 3.0, 6.0, 47.0, 29.0, 39.0, 42.0, 56.0, 55.0, 16.0, 18.0, 30.0, 23.0, 8.0, 28.0, 36.0, 43.0, 34.0, 36.0, 19.0, 20.0, 58.0, 73.0, 37.0, 40.0, 3.0, 6.0, 3.0, 22.0, 8.0, 14.0, 14.0, 9.0, 11.0, 9.0, 37.0, 29.0, 51.0, 48.0, 44.0, 32.0, 39.0, 40.0, 53.0, 42.0, 42.0, 45.0, 14.0, 17.0, 28.0, 53.0, 12.0, 5.0, 9.0, 3.0, 17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924762482736284, "mean_inference_ms": 1.2114109394813388, "mean_action_processing_ms": 0.13370852013253243, "mean_env_wait_ms": 0.8763468400843789, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 768000, "num_agent_steps_trained": 768000, "num_env_steps_sampled": 384000, "num_env_steps_trained": 384000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 384000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 768000, "timers": {"training_iteration_time_ms": 3716.909, "learn_time_ms": 1106.67, "learn_throughput": 11566.226, "synch_weights_time_ms": 11.25}, "counters": {"num_env_steps_sampled": 384000, "num_env_steps_trained": 384000, "num_agent_steps_sampled": 768000, "num_agent_steps_trained": 768000}, "done": false, "episodes_total": 960, "training_iteration": 30, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-53", "timestamp": 1666580513, "time_this_iter_s": 3.6705687046051025, "time_total_s": 113.99522829055786, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 113.99522829055786, "timesteps_since_restore": 0, "iterations_since_restore": 30, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.2, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 12.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 41.39, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.19, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.89, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.23, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 5.01, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 3.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.1, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.82, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.8, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 1.92, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 3.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.1, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.1, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.8626451769865326e-10, "cur_lr": 0.0010000000474974513, "total_loss": -0.0069397566840052605, "policy_loss": -0.006435990799218416, "vf_loss": 3.3930177688598633, "vf_explained_var": 0.253547728061676, "kl": 0.0013529944699257612, "entropy": 1.6861307621002197, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 396800, "num_env_steps_trained": 396800, "num_agent_steps_sampled": 793600, "num_agent_steps_trained": 793600}, "sampler_results": {"episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 66.59, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 33.295}, "custom_metrics": {"sparse_reward_mean": 12.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 41.39, "shaped_reward_min": 6, "shaped_reward_max": 78, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.19, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.89, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.23, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 10, "useful_onion_pickup_agent_1_mean": 5.01, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 1.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 3.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.1, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.82, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.47, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.8, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.24, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 1.92, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 0.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 3, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 0.83, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 3.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.1, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.1, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6918396883946886, "mean_inference_ms": 1.2118290938548686, "mean_action_processing_ms": 0.13359040886880258, "mean_env_wait_ms": 0.8774454909009484, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 179.0, "episode_reward_min": 6.0, "episode_reward_mean": 66.59, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 93.0}, "policy_reward_mean": {"ppo": 33.295}, "hist_stats": {"episode_reward": [31.0, 76.0, 17.0, 23.0, 125.0, 145.0, 9.0, 71.0, 85.0, 48.0, 55.0, 36.0, 92.0, 44.0, 128.0, 74.0, 96.0, 53.0, 31.0, 14.0, 90.0, 40.0, 179.0, 53.0, 74.0, 90.0, 20.0, 87.0, 80.0, 25.0, 118.0, 9.0, 6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 14.0, 31.0, 45.0, 9.0, 8.0, 9.0, 14.0, 58.0, 67.0, 70.0, 75.0, 3.0, 6.0, 33.0, 38.0, 43.0, 42.0, 26.0, 22.0, 21.0, 34.0, 9.0, 27.0, 52.0, 40.0, 22.0, 22.0, 60.0, 68.0, 49.0, 25.0, 45.0, 51.0, 25.0, 28.0, 11.0, 20.0, 5.0, 9.0, 56.0, 34.0, 14.0, 26.0, 86.0, 93.0, 15.0, 38.0, 34.0, 40.0, 50.0, 40.0, 0.0, 20.0, 34.0, 53.0, 45.0, 35.0, 6.0, 19.0, 53.0, 65.0, 3.0, 6.0, 6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6918396883946886, "mean_inference_ms": 1.2118290938548686, "mean_action_processing_ms": 0.13359040886880258, "mean_env_wait_ms": 0.8774454909009484, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 793600, "num_agent_steps_trained": 793600, "num_env_steps_sampled": 396800, "num_env_steps_trained": 396800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 396800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 793600, "timers": {"training_iteration_time_ms": 3721.315, "learn_time_ms": 1112.039, "learn_throughput": 11510.387, "synch_weights_time_ms": 11.618}, "counters": {"num_env_steps_sampled": 396800, "num_env_steps_trained": 396800, "num_agent_steps_sampled": 793600, "num_agent_steps_trained": 793600}, "done": false, "episodes_total": 992, "training_iteration": 31, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-01-57", "timestamp": 1666580517, "time_this_iter_s": 3.689429759979248, "time_total_s": 117.68465805053711, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 117.68465805053711, "timesteps_since_restore": 0, "iterations_since_restore": 31, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.766666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 13.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 44.58, "shaped_reward_min": 6, "shaped_reward_max": 76, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.52, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.69, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.5, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.04, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.67, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 3.67, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.8, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.63, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.06, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 1.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.31, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.67, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.67, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.313225884932663e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.002310357056558132, "policy_loss": -0.0018515328411012888, "vf_loss": 3.8169312477111816, "vf_explained_var": 0.21377882361412048, "kl": 0.0015562805347144604, "entropy": 1.6810286045074463, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 409600, "num_env_steps_trained": 409600, "num_agent_steps_sampled": 819200, "num_agent_steps_trained": 819200}, "sampler_results": {"episode_reward_max": 173.0, "episode_reward_min": 6.0, "episode_reward_mean": 70.98, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 89.0}, "policy_reward_mean": {"ppo": 35.49}, "custom_metrics": {"sparse_reward_mean": 13.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 44.58, "shaped_reward_min": 6, "shaped_reward_max": 76, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.52, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.69, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.5, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.04, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.67, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 3.67, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 9, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.8, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.63, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.06, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 3, "soup_delivery_agent_1_mean": 1.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.31, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.67, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 9, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.67, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 9, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.691321764992291, "mean_inference_ms": 1.2113081635303937, "mean_action_processing_ms": 0.13352380109990097, "mean_env_wait_ms": 0.8776338249946448, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 173.0, "episode_reward_min": 6.0, "episode_reward_mean": 70.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 89.0}, "policy_reward_mean": {"ppo": 35.49}, "hist_stats": {"episode_reward": [6.0, 71.0, 131.0, 33.0, 12.0, 87.0, 37.0, 95.0, 102.0, 88.0, 20.0, 147.0, 45.0, 61.0, 79.0, 113.0, 31.0, 39.0, 39.0, 91.0, 68.0, 44.0, 52.0, 85.0, 44.0, 87.0, 20.0, 79.0, 122.0, 50.0, 82.0, 23.0, 26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 0.0, 37.0, 34.0, 62.0, 69.0, 10.0, 23.0, 6.0, 6.0, 26.0, 61.0, 28.0, 9.0, 41.0, 54.0, 50.0, 52.0, 34.0, 54.0, 3.0, 17.0, 83.0, 64.0, 22.0, 23.0, 36.0, 25.0, 37.0, 42.0, 43.0, 70.0, 11.0, 20.0, 25.0, 14.0, 20.0, 19.0, 49.0, 42.0, 45.0, 23.0, 30.0, 14.0, 31.0, 21.0, 49.0, 36.0, 14.0, 30.0, 39.0, 48.0, 11.0, 9.0, 43.0, 36.0, 52.0, 70.0, 27.0, 23.0, 39.0, 43.0, 0.0, 23.0, 11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.691321764992291, "mean_inference_ms": 1.2113081635303937, "mean_action_processing_ms": 0.13352380109990097, "mean_env_wait_ms": 0.8776338249946448, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 819200, "num_agent_steps_trained": 819200, "num_env_steps_sampled": 409600, "num_env_steps_trained": 409600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 409600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 819200, "timers": {"training_iteration_time_ms": 3726.338, "learn_time_ms": 1112.85, "learn_throughput": 11502.002, "synch_weights_time_ms": 11.967}, "counters": {"num_env_steps_sampled": 409600, "num_env_steps_trained": 409600, "num_agent_steps_sampled": 819200, "num_agent_steps_trained": 819200}, "done": false, "episodes_total": 1024, "training_iteration": 32, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-01", "timestamp": 1666580521, "time_this_iter_s": 3.81624174118042, "time_total_s": 121.50089979171753, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 121.50089979171753, "timesteps_since_restore": 0, "iterations_since_restore": 32, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.22, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 15.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 46.47, "shaped_reward_min": 9, "shaped_reward_max": 76, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.46, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.97, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.62, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 3.75, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.34, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.8, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.45, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.44, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.15, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.75, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.34, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.75, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.34, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.6566129424663316e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.005534800700843334, "policy_loss": -0.0050851586274802685, "vf_loss": 3.892998695373535, "vf_explained_var": 0.28922930359840393, "kl": 0.001608746824786067, "entropy": 1.677882194519043, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 422400, "num_env_steps_trained": 422400, "num_agent_steps_sampled": 844800, "num_agent_steps_trained": 844800}, "sampler_results": {"episode_reward_max": 173.0, "episode_reward_min": 9.0, "episode_reward_mean": 76.47, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 89.0}, "policy_reward_mean": {"ppo": 38.235}, "custom_metrics": {"sparse_reward_mean": 15.0, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 46.47, "shaped_reward_min": 9, "shaped_reward_max": 76, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.46, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 6.97, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 4.62, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 5.34, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 9, "useful_onion_drop_agent_1_mean": 1.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 3.75, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.34, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 3.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.8, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.45, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.44, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.15, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.25, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 3.75, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.34, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 3.75, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.34, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6909394660361795, "mean_inference_ms": 1.2106474727147383, "mean_action_processing_ms": 0.1334392124001721, "mean_env_wait_ms": 0.8773286615961038, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 173.0, "episode_reward_min": 9.0, "episode_reward_mean": 76.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 89.0}, "policy_reward_mean": {"ppo": 38.235}, "hist_stats": {"episode_reward": [26.0, 23.0, 23.0, 125.0, 65.0, 90.0, 73.0, 106.0, 80.0, 28.0, 9.0, 95.0, 96.0, 51.0, 110.0, 147.0, 28.0, 39.0, 9.0, 150.0, 88.0, 71.0, 77.0, 67.0, 48.0, 107.0, 79.0, 84.0, 39.0, 34.0, 98.0, 64.0, 83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [11.0, 15.0, 12.0, 11.0, 14.0, 9.0, 62.0, 63.0, 31.0, 34.0, 45.0, 45.0, 42.0, 31.0, 56.0, 50.0, 41.0, 39.0, 19.0, 9.0, 0.0, 9.0, 53.0, 42.0, 53.0, 43.0, 22.0, 29.0, 53.0, 57.0, 71.0, 76.0, 9.0, 19.0, 14.0, 25.0, 6.0, 3.0, 67.0, 83.0, 50.0, 38.0, 37.0, 34.0, 36.0, 41.0, 33.0, 34.0, 33.0, 15.0, 53.0, 54.0, 34.0, 45.0, 41.0, 43.0, 22.0, 17.0, 6.0, 28.0, 44.0, 54.0, 28.0, 36.0, 45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6909394660361795, "mean_inference_ms": 1.2106474727147383, "mean_action_processing_ms": 0.1334392124001721, "mean_env_wait_ms": 0.8773286615961038, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 844800, "num_agent_steps_trained": 844800, "num_env_steps_sampled": 422400, "num_env_steps_trained": 422400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 422400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 844800, "timers": {"training_iteration_time_ms": 3705.997, "learn_time_ms": 1102.749, "learn_throughput": 11607.351, "synch_weights_time_ms": 12.176}, "counters": {"num_env_steps_sampled": 422400, "num_env_steps_trained": 422400, "num_agent_steps_sampled": 844800, "num_agent_steps_trained": 844800}, "done": false, "episodes_total": 1056, "training_iteration": 33, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-04", "timestamp": 1666580524, "time_this_iter_s": 3.704005479812622, "time_total_s": 125.20490527153015, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 125.20490527153015, "timesteps_since_restore": 0, "iterations_since_restore": 33, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.06666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 17.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 50.11, "shaped_reward_min": 9, "shaped_reward_max": 83, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.67, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.06, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.04, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.52, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.53, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.07, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 1.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.53, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.53, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.3283064712331658e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.0033294381573796272, "policy_loss": -0.0029248581267893314, "vf_loss": 4.272636890411377, "vf_explained_var": 0.2562987506389618, "kl": 0.001298167509958148, "entropy": 1.6636810302734375, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 435200, "num_env_steps_trained": 435200, "num_agent_steps_sampled": 870400, "num_agent_steps_trained": 870400}, "sampler_results": {"episode_reward_max": 193.0, "episode_reward_min": 9.0, "episode_reward_mean": 84.51, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 42.255}, "custom_metrics": {"sparse_reward_mean": 17.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 50.11, "shaped_reward_min": 9, "shaped_reward_max": 83, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.67, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.06, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.04, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.52, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 2.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.07, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.53, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.07, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 3.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 0.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 1.88, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.56, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.36, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 1.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.07, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.53, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.07, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.53, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906616942712467, "mean_inference_ms": 1.2101625683690138, "mean_action_processing_ms": 0.13338849796915966, "mean_env_wait_ms": 0.8773535896870084, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 193.0, "episode_reward_min": 9.0, "episode_reward_mean": 84.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 42.255}, "hist_stats": {"episode_reward": [83.0, 98.0, 9.0, 39.0, 92.0, 87.0, 56.0, 9.0, 144.0, 153.0, 51.0, 50.0, 52.0, 156.0, 136.0, 45.0, 48.0, 59.0, 42.0, 95.0, 46.0, 117.0, 96.0, 93.0, 31.0, 44.0, 94.0, 53.0, 50.0, 45.0, 44.0, 84.0, 133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [45.0, 38.0, 36.0, 62.0, 3.0, 6.0, 22.0, 17.0, 56.0, 36.0, 35.0, 52.0, 24.0, 32.0, 6.0, 3.0, 62.0, 82.0, 77.0, 76.0, 24.0, 27.0, 23.0, 27.0, 27.0, 25.0, 77.0, 79.0, 57.0, 79.0, 22.0, 23.0, 29.0, 19.0, 35.0, 24.0, 31.0, 11.0, 46.0, 49.0, 20.0, 26.0, 51.0, 66.0, 57.0, 39.0, 50.0, 43.0, 8.0, 23.0, 13.0, 31.0, 37.0, 57.0, 31.0, 22.0, 34.0, 16.0, 15.0, 30.0, 22.0, 22.0, 38.0, 46.0, 71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906616942712467, "mean_inference_ms": 1.2101625683690138, "mean_action_processing_ms": 0.13338849796915966, "mean_env_wait_ms": 0.8773535896870084, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 870400, "num_agent_steps_trained": 870400, "num_env_steps_sampled": 435200, "num_env_steps_trained": 435200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 435200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 870400, "timers": {"training_iteration_time_ms": 3703.091, "learn_time_ms": 1099.905, "learn_throughput": 11637.363, "synch_weights_time_ms": 11.648}, "counters": {"num_env_steps_sampled": 435200, "num_env_steps_trained": 435200, "num_agent_steps_sampled": 870400, "num_agent_steps_trained": 870400}, "done": false, "episodes_total": 1088, "training_iteration": 34, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-08", "timestamp": 1666580528, "time_this_iter_s": 3.8429672718048096, "time_total_s": 129.04787254333496, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 129.04787254333496, "timesteps_since_restore": 0, "iterations_since_restore": 34, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.316666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 19.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 51.76, "shaped_reward_min": 14, "shaped_reward_max": 86, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.34, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.74, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.11, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.8, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.42, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.99, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.48, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.11, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.8, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.11, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.8, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.1641532356165829e-11, "cur_lr": 0.0010000000474974513, "total_loss": -0.005998552311211824, "policy_loss": -0.005601278506219387, "vf_loss": 4.330240726470947, "vf_explained_var": 0.26162025332450867, "kl": 0.0014254730194807053, "entropy": 1.6605894565582275, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 448000, "num_env_steps_trained": 448000, "num_agent_steps_sampled": 896000, "num_agent_steps_trained": 896000}, "sampler_results": {"episode_reward_max": 203.0, "episode_reward_min": 14.0, "episode_reward_mean": 90.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 45.48}, "custom_metrics": {"sparse_reward_mean": 19.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 51.76, "shaped_reward_min": 14, "shaped_reward_max": 86, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.54, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.34, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 5.74, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 12, "onion_drop_agent_0_mean": 2.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 2.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.11, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 10, "potting_onion_agent_1_mean": 4.8, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.42, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.99, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.96, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 7, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.48, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.11, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 10, "optimal_onion_potting_agent_1_mean": 4.8, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.11, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 10, "viable_onion_potting_agent_1_mean": 4.8, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906168886604556, "mean_inference_ms": 1.2098430190540526, "mean_action_processing_ms": 0.13335690940844777, "mean_env_wait_ms": 0.8774501217181921, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 203.0, "episode_reward_min": 14.0, "episode_reward_mean": 90.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 45.48}, "hist_stats": {"episode_reward": [133.0, 173.0, 87.0, 92.0, 98.0, 133.0, 73.0, 122.0, 23.0, 62.0, 91.0, 80.0, 115.0, 25.0, 66.0, 110.0, 30.0, 145.0, 26.0, 109.0, 73.0, 101.0, 23.0, 102.0, 110.0, 60.0, 56.0, 101.0, 77.0, 96.0, 113.0, 66.0, 28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [71.0, 62.0, 89.0, 84.0, 46.0, 41.0, 45.0, 47.0, 34.0, 64.0, 68.0, 65.0, 39.0, 34.0, 60.0, 62.0, 8.0, 15.0, 40.0, 22.0, 41.0, 50.0, 42.0, 38.0, 63.0, 52.0, 13.0, 12.0, 23.0, 43.0, 47.0, 63.0, 11.0, 19.0, 74.0, 71.0, 17.0, 9.0, 54.0, 55.0, 37.0, 36.0, 47.0, 54.0, 11.0, 12.0, 53.0, 49.0, 62.0, 48.0, 20.0, 40.0, 23.0, 33.0, 55.0, 46.0, 44.0, 33.0, 42.0, 54.0, 54.0, 59.0, 35.0, 31.0, 20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906168886604556, "mean_inference_ms": 1.2098430190540526, "mean_action_processing_ms": 0.13335690940844777, "mean_env_wait_ms": 0.8774501217181921, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 896000, "num_agent_steps_trained": 896000, "num_env_steps_sampled": 448000, "num_env_steps_trained": 448000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 448000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 896000, "timers": {"training_iteration_time_ms": 3694.511, "learn_time_ms": 1092.395, "learn_throughput": 11717.368, "synch_weights_time_ms": 11.57}, "counters": {"num_env_steps_sampled": 448000, "num_env_steps_trained": 448000, "num_agent_steps_sampled": 896000, "num_agent_steps_trained": 896000}, "done": false, "episodes_total": 1120, "training_iteration": 35, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-12", "timestamp": 1666580532, "time_this_iter_s": 3.6886072158813477, "time_total_s": 132.7364797592163, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 132.7364797592163, "timesteps_since_restore": 0, "iterations_since_restore": 35, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.259999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 20.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 53.37, "shaped_reward_min": 9, "shaped_reward_max": 86, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.23, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 7.37, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.67, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 5.72, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.44, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.69, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.01, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.11, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.71, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.5, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.22, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.44, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.69, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.44, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.69, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.8207661780829145e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.006255907937884331, "policy_loss": -0.005860620643943548, "vf_loss": 4.331794738769531, "vf_explained_var": 0.3150397837162018, "kl": 0.0014443796826526523, "entropy": 1.656929612159729, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 460800, "num_env_steps_trained": 460800, "num_agent_steps_sampled": 921600, "num_agent_steps_trained": 921600}, "sampler_results": {"episode_reward_max": 203.0, "episode_reward_min": 9.0, "episode_reward_mean": 93.77, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 46.885}, "custom_metrics": {"sparse_reward_mean": 20.2, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 53.37, "shaped_reward_min": 9, "shaped_reward_max": 86, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.23, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 7.37, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.67, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 5.72, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.3, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.44, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.69, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.01, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.11, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.71, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.5, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.18, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.22, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.44, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.69, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.44, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.69, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906281044431333, "mean_inference_ms": 1.2096052422422812, "mean_action_processing_ms": 0.13334734246729163, "mean_env_wait_ms": 0.8776488430576501, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 203.0, "episode_reward_min": 9.0, "episode_reward_mean": 93.77, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 46.885}, "hist_stats": {"episode_reward": [28.0, 149.0, 87.0, 82.0, 102.0, 39.0, 101.0, 193.0, 93.0, 50.0, 110.0, 161.0, 139.0, 70.0, 14.0, 93.0, 66.0, 101.0, 179.0, 144.0, 84.0, 33.0, 188.0, 53.0, 123.0, 23.0, 66.0, 130.0, 136.0, 48.0, 42.0, 110.0, 96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [20.0, 8.0, 72.0, 77.0, 48.0, 39.0, 34.0, 48.0, 48.0, 54.0, 19.0, 20.0, 56.0, 45.0, 89.0, 104.0, 44.0, 49.0, 17.0, 33.0, 66.0, 44.0, 83.0, 78.0, 71.0, 68.0, 48.0, 22.0, 11.0, 3.0, 47.0, 46.0, 26.0, 40.0, 41.0, 60.0, 71.0, 108.0, 63.0, 81.0, 36.0, 48.0, 25.0, 8.0, 89.0, 99.0, 28.0, 25.0, 63.0, 60.0, 20.0, 3.0, 40.0, 26.0, 64.0, 66.0, 66.0, 70.0, 30.0, 18.0, 22.0, 20.0, 64.0, 46.0, 61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906281044431333, "mean_inference_ms": 1.2096052422422812, "mean_action_processing_ms": 0.13334734246729163, "mean_env_wait_ms": 0.8776488430576501, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 921600, "num_agent_steps_trained": 921600, "num_env_steps_sampled": 460800, "num_env_steps_trained": 460800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 460800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 921600, "timers": {"training_iteration_time_ms": 3690.454, "learn_time_ms": 1086.199, "learn_throughput": 11784.209, "synch_weights_time_ms": 11.013}, "counters": {"num_env_steps_sampled": 460800, "num_env_steps_trained": 460800, "num_agent_steps_sampled": 921600, "num_agent_steps_trained": 921600}, "done": false, "episodes_total": 1152, "training_iteration": 36, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-16", "timestamp": 1666580536, "time_this_iter_s": 3.6997194290161133, "time_total_s": 136.43619918823242, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 136.43619918823242, "timesteps_since_restore": 0, "iterations_since_restore": 36, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.16, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 19.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.6, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.46, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.79, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 5.92, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.64, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 3.92, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.53, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.64, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.64, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.9103830890414573e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.002857581479474902, "policy_loss": -0.0024382565170526505, "vf_loss": 4.072160720825195, "vf_explained_var": 0.3045051097869873, "kl": 0.0014278730377554893, "entropy": 1.6530787944793701, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 473600, "num_env_steps_trained": 473600, "num_agent_steps_sampled": 947200, "num_agent_steps_trained": 947200}, "sampler_results": {"episode_reward_max": 203.0, "episode_reward_min": 9.0, "episode_reward_mean": 91.4, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 45.7}, "custom_metrics": {"sparse_reward_mean": 19.4, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.6, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.46, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.79, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 5.92, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 11, "onion_drop_agent_0_mean": 2.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.33, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.64, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 3.92, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.21, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.53, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.74, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.34, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 4, "soup_drop_agent_0_mean": 1.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 7, "soup_drop_agent_1_mean": 1.2, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 9, "optimal_onion_potting_agent_0_mean": 4.33, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.64, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.33, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.64, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906373736263695, "mean_inference_ms": 1.209409405773758, "mean_action_processing_ms": 0.13332850711565247, "mean_env_wait_ms": 0.877647120825507, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 203.0, "episode_reward_min": 9.0, "episode_reward_mean": 91.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 108.0}, "policy_reward_mean": {"ppo": 45.7}, "hist_stats": {"episode_reward": [96.0, 103.0, 28.0, 115.0, 45.0, 86.0, 36.0, 88.0, 52.0, 155.0, 76.0, 47.0, 45.0, 23.0, 135.0, 150.0, 39.0, 23.0, 103.0, 67.0, 93.0, 122.0, 133.0, 82.0, 85.0, 119.0, 104.0, 90.0, 147.0, 87.0, 55.0, 96.0, 182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [61.0, 35.0, 43.0, 60.0, 8.0, 20.0, 53.0, 62.0, 9.0, 36.0, 57.0, 29.0, 14.0, 22.0, 45.0, 43.0, 28.0, 24.0, 78.0, 77.0, 36.0, 40.0, 17.0, 30.0, 29.0, 16.0, 17.0, 6.0, 75.0, 60.0, 71.0, 79.0, 14.0, 25.0, 12.0, 11.0, 57.0, 46.0, 36.0, 31.0, 44.0, 49.0, 61.0, 61.0, 66.0, 67.0, 37.0, 45.0, 46.0, 39.0, 48.0, 71.0, 40.0, 64.0, 37.0, 53.0, 90.0, 57.0, 50.0, 37.0, 28.0, 27.0, 44.0, 52.0, 83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6906373736263695, "mean_inference_ms": 1.209409405773758, "mean_action_processing_ms": 0.13332850711565247, "mean_env_wait_ms": 0.877647120825507, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 947200, "num_agent_steps_trained": 947200, "num_env_steps_sampled": 473600, "num_env_steps_trained": 473600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 473600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 947200, "timers": {"training_iteration_time_ms": 3698.67, "learn_time_ms": 1096.287, "learn_throughput": 11675.779, "synch_weights_time_ms": 11.456}, "counters": {"num_env_steps_sampled": 473600, "num_env_steps_trained": 473600, "num_agent_steps_sampled": 947200, "num_agent_steps_trained": 947200}, "done": false, "episodes_total": 1184, "training_iteration": 37, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-20", "timestamp": 1666580540, "time_this_iter_s": 3.7627384662628174, "time_total_s": 140.19893765449524, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 140.19893765449524, "timesteps_since_restore": 0, "iterations_since_restore": 37, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.2, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 21.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.95, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.65, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.75, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.03, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.75, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.76, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.6, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.94, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.98, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.75, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.75, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.4551915445207286e-12, "cur_lr": 0.0010000000474974513, "total_loss": -0.005668387282639742, "policy_loss": -0.005276101641356945, "vf_loss": 4.287709712982178, "vf_explained_var": 0.34124916791915894, "kl": 0.0015137059381231666, "entropy": 1.6421082019805908, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 486400, "num_env_steps_trained": 486400, "num_agent_steps_sampled": 972800, "num_agent_steps_trained": 972800}, "sampler_results": {"episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 95.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 47.675}, "custom_metrics": {"sparse_reward_mean": 21.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.95, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.65, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.75, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.03, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 9, "useful_onion_drop_agent_0_mean": 1.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 9, "potting_onion_agent_0_mean": 4.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.75, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 3.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.0, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.76, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.59, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.6, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 1.47, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.94, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.98, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.75, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.75, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6905464855961168, "mean_inference_ms": 1.209148072408414, "mean_action_processing_ms": 0.1333054417881031, "mean_env_wait_ms": 0.8775259558875729, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 95.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 47.675}, "hist_stats": {"episode_reward": [182.0, 31.0, 147.0, 203.0, 152.0, 125.0, 121.0, 101.0, 88.0, 90.0, 153.0, 142.0, 79.0, 17.0, 147.0, 27.0, 127.0, 125.0, 94.0, 162.0, 121.0, 80.0, 23.0, 101.0, 50.0, 113.0, 9.0, 122.0, 46.0, 110.0, 85.0, 47.0, 150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [83.0, 99.0, 14.0, 17.0, 73.0, 74.0, 95.0, 108.0, 77.0, 75.0, 68.0, 57.0, 68.0, 53.0, 55.0, 46.0, 54.0, 34.0, 42.0, 48.0, 79.0, 74.0, 61.0, 81.0, 45.0, 34.0, 8.0, 9.0, 77.0, 70.0, 13.0, 14.0, 68.0, 59.0, 56.0, 69.0, 50.0, 44.0, 77.0, 85.0, 55.0, 66.0, 40.0, 40.0, 17.0, 6.0, 44.0, 57.0, 16.0, 34.0, 48.0, 65.0, 3.0, 6.0, 60.0, 62.0, 17.0, 29.0, 54.0, 56.0, 40.0, 45.0, 20.0, 27.0, 94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6905464855961168, "mean_inference_ms": 1.209148072408414, "mean_action_processing_ms": 0.1333054417881031, "mean_env_wait_ms": 0.8775259558875729, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 972800, "num_agent_steps_trained": 972800, "num_env_steps_sampled": 486400, "num_env_steps_trained": 486400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 486400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 972800, "timers": {"training_iteration_time_ms": 3700.754, "learn_time_ms": 1092.91, "learn_throughput": 11711.85, "synch_weights_time_ms": 11.016}, "counters": {"num_env_steps_sampled": 486400, "num_env_steps_trained": 486400, "num_agent_steps_sampled": 972800, "num_agent_steps_trained": 972800}, "done": false, "episodes_total": 1216, "training_iteration": 38, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-24", "timestamp": 1666580544, "time_this_iter_s": 3.771451473236084, "time_total_s": 143.97038912773132, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 143.97038912773132, "timesteps_since_restore": 0, "iterations_since_restore": 38, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.166666666666668, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 21.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 53.0, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.52, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.81, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.72, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.13, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.91, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.8, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.91, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.91, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.275957722603643e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.0030713295564055443, "policy_loss": -0.0026913548354059458, "vf_loss": 4.407833099365234, "vf_explained_var": 0.35603398084640503, "kl": 0.0016552733723074198, "entropy": 1.641512393951416, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 499200, "num_env_steps_trained": 499200, "num_agent_steps_sampled": 998400, "num_agent_steps_trained": 998400}, "sampler_results": {"episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 95.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 47.5}, "custom_metrics": {"sparse_reward_mean": 21.0, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 53.0, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.52, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 17, "onion_pickup_agent_1_mean": 7.81, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 15, "useful_onion_pickup_agent_0_mean": 5.72, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 13, "useful_onion_pickup_agent_1_mean": 6.13, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 13, "onion_drop_agent_1_mean": 2.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 13, "useful_onion_drop_agent_1_mean": 1.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.32, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 4.91, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.93, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 10, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.51, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.45, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.85, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.8, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.32, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 4.91, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.32, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 4.91, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6904068077951437, "mean_inference_ms": 1.2088387215834406, "mean_action_processing_ms": 0.13328399687148587, "mean_env_wait_ms": 0.877354026661738, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 95.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 47.5}, "hist_stats": {"episode_reward": [150.0, 113.0, 55.0, 77.0, 9.0, 47.0, 150.0, 34.0, 77.0, 47.0, 90.0, 90.0, 161.0, 128.0, 79.0, 70.0, 79.0, 50.0, 118.0, 109.0, 119.0, 84.0, 136.0, 70.0, 42.0, 110.0, 122.0, 77.0, 134.0, 12.0, 61.0, 74.0, 34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [94.0, 56.0, 68.0, 45.0, 26.0, 29.0, 37.0, 40.0, 3.0, 6.0, 30.0, 17.0, 73.0, 77.0, 16.0, 18.0, 43.0, 34.0, 17.0, 30.0, 53.0, 37.0, 70.0, 20.0, 79.0, 82.0, 62.0, 66.0, 41.0, 38.0, 31.0, 39.0, 42.0, 37.0, 19.0, 31.0, 59.0, 59.0, 50.0, 59.0, 65.0, 54.0, 39.0, 45.0, 71.0, 65.0, 26.0, 44.0, 19.0, 23.0, 56.0, 54.0, 56.0, 66.0, 42.0, 35.0, 65.0, 69.0, 9.0, 3.0, 34.0, 27.0, 32.0, 42.0, 17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6904068077951437, "mean_inference_ms": 1.2088387215834406, "mean_action_processing_ms": 0.13328399687148587, "mean_env_wait_ms": 0.877354026661738, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 998400, "num_agent_steps_trained": 998400, "num_env_steps_sampled": 499200, "num_env_steps_trained": 499200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 499200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 998400, "timers": {"training_iteration_time_ms": 3678.152, "learn_time_ms": 1103.956, "learn_throughput": 11594.67, "synch_weights_time_ms": 10.871}, "counters": {"num_env_steps_sampled": 499200, "num_env_steps_trained": 499200, "num_agent_steps_sampled": 998400, "num_agent_steps_trained": 998400}, "done": false, "episodes_total": 1248, "training_iteration": 39, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-28", "timestamp": 1666580548, "time_this_iter_s": 3.767300605773926, "time_total_s": 147.73768973350525, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 147.73768973350525, "timesteps_since_restore": 0, "iterations_since_restore": 39, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.080000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 23.4, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.7, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.13, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.97, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.42, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.43, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.29, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.87, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.91, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.7, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.6379788613018216e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.003938781097531319, "policy_loss": -0.003565125400200486, "vf_loss": 4.442060470581055, "vf_explained_var": 0.35988348722457886, "kl": 0.0015638747718185186, "entropy": 1.6357197761535645, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 512000, "num_env_steps_trained": 512000, "num_agent_steps_sampled": 1024000, "num_agent_steps_trained": 1024000}, "sampler_results": {"episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 99.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 49.75}, "custom_metrics": {"sparse_reward_mean": 23.4, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 52.7, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.13, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.97, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 14, "useful_onion_pickup_agent_0_mean": 5.42, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.43, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 10, "useful_onion_drop_agent_0_mean": 1.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.24, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.08, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 13, "dish_pickup_agent_0_mean": 4.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.29, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.87, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.91, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.52, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.51, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 1.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.86, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.7, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.24, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.08, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 13, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.24, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.08, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 13, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6901884261177258, "mean_inference_ms": 1.2084870526268037, "mean_action_processing_ms": 0.13327061894254838, "mean_env_wait_ms": 0.8770979785487504, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 99.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 132.0}, "policy_reward_mean": {"ppo": 49.75}, "hist_stats": {"episode_reward": [34.0, 155.0, 158.0, 74.0, 75.0, 115.0, 114.0, 90.0, 87.0, 163.0, 39.0, 242.0, 101.0, 144.0, 69.0, 88.0, 50.0, 96.0, 31.0, 88.0, 179.0, 167.0, 90.0, 103.0, 107.0, 90.0, 9.0, 85.0, 82.0, 53.0, 9.0, 147.0, 139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [17.0, 17.0, 76.0, 79.0, 86.0, 72.0, 37.0, 37.0, 31.0, 44.0, 61.0, 54.0, 65.0, 49.0, 51.0, 39.0, 45.0, 42.0, 74.0, 89.0, 22.0, 17.0, 132.0, 110.0, 49.0, 52.0, 68.0, 76.0, 39.0, 30.0, 36.0, 52.0, 24.0, 26.0, 42.0, 54.0, 23.0, 8.0, 54.0, 34.0, 82.0, 97.0, 88.0, 79.0, 43.0, 47.0, 42.0, 61.0, 50.0, 57.0, 34.0, 56.0, 9.0, 0.0, 33.0, 52.0, 48.0, 34.0, 20.0, 33.0, 6.0, 3.0, 61.0, 86.0, 63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6901884261177258, "mean_inference_ms": 1.2084870526268037, "mean_action_processing_ms": 0.13327061894254838, "mean_env_wait_ms": 0.8770979785487504, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1024000, "num_agent_steps_trained": 1024000, "num_env_steps_sampled": 512000, "num_env_steps_trained": 512000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 512000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1024000, "timers": {"training_iteration_time_ms": 3680.412, "learn_time_ms": 1105.379, "learn_throughput": 11579.735, "synch_weights_time_ms": 10.803}, "counters": {"num_env_steps_sampled": 512000, "num_env_steps_trained": 512000, "num_agent_steps_sampled": 1024000, "num_agent_steps_trained": 1024000}, "done": false, "episodes_total": 1280, "training_iteration": 40, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-32", "timestamp": 1666580552, "time_this_iter_s": 3.6896848678588867, "time_total_s": 151.42737460136414, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 151.42737460136414, "timesteps_since_restore": 0, "iterations_since_restore": 40, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.099999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 22.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.84, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.69, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.12, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.1, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.49, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.16, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.1, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.24, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.95, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.66, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.95, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.16, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.1, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.16, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.1, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.8189894306509108e-13, "cur_lr": 0.0010000000474974513, "total_loss": -0.005516150966286659, "policy_loss": -0.005121724680066109, "vf_loss": 4.235774517059326, "vf_explained_var": 0.39267563819885254, "kl": 0.0015804313588887453, "entropy": 1.6360013484954834, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 524800, "num_env_steps_trained": 524800, "num_agent_steps_sampled": 1049600, "num_agent_steps_trained": 1049600}, "sampler_results": {"episode_reward_max": 202.0, "episode_reward_min": 9.0, "episode_reward_mean": 98.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 111.0}, "policy_reward_mean": {"ppo": 49.02}, "custom_metrics": {"sparse_reward_mean": 22.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.84, "shaped_reward_min": 9, "shaped_reward_max": 87, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.69, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 8.12, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.1, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 11, "useful_onion_pickup_agent_1_mean": 6.49, "useful_onion_pickup_agent_1_min": 0, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.37, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.37, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 4.16, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.1, "potting_onion_agent_1_min": 0, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.75, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.24, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.95, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.66, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.56, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.95, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 6, "soup_drop_agent_1_mean": 0.73, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.16, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.1, "optimal_onion_potting_agent_1_min": 0, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.16, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.1, "viable_onion_potting_agent_1_min": 0, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6898855332895328, "mean_inference_ms": 1.208174559889397, "mean_action_processing_ms": 0.13325926651821823, "mean_env_wait_ms": 0.8768887278744478, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 202.0, "episode_reward_min": 9.0, "episode_reward_mean": 98.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 111.0}, "policy_reward_mean": {"ppo": 49.02}, "hist_stats": {"episode_reward": [139.0, 99.0, 25.0, 144.0, 17.0, 9.0, 71.0, 152.0, 98.0, 91.0, 120.0, 107.0, 109.0, 191.0, 109.0, 83.0, 104.0, 71.0, 127.0, 93.0, 98.0, 161.0, 42.0, 115.0, 141.0, 90.0, 98.0, 56.0, 128.0, 85.0, 138.0, 144.0, 9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [63.0, 76.0, 52.0, 47.0, 20.0, 5.0, 73.0, 71.0, 8.0, 9.0, 6.0, 3.0, 20.0, 51.0, 75.0, 77.0, 54.0, 44.0, 40.0, 51.0, 54.0, 66.0, 55.0, 52.0, 70.0, 39.0, 82.0, 109.0, 69.0, 40.0, 43.0, 40.0, 47.0, 57.0, 40.0, 31.0, 53.0, 74.0, 34.0, 59.0, 50.0, 48.0, 85.0, 76.0, 17.0, 25.0, 56.0, 59.0, 79.0, 62.0, 42.0, 48.0, 47.0, 51.0, 36.0, 20.0, 48.0, 80.0, 51.0, 34.0, 63.0, 75.0, 69.0, 75.0, 3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6898855332895328, "mean_inference_ms": 1.208174559889397, "mean_action_processing_ms": 0.13325926651821823, "mean_env_wait_ms": 0.8768887278744478, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1049600, "num_agent_steps_trained": 1049600, "num_env_steps_sampled": 524800, "num_env_steps_trained": 524800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 524800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1049600, "timers": {"training_iteration_time_ms": 3686.982, "learn_time_ms": 1112.505, "learn_throughput": 11505.568, "synch_weights_time_ms": 10.473}, "counters": {"num_env_steps_sampled": 524800, "num_env_steps_trained": 524800, "num_agent_steps_sampled": 1049600, "num_agent_steps_trained": 1049600}, "done": false, "episodes_total": 1312, "training_iteration": 41, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-36", "timestamp": 1666580556, "time_this_iter_s": 3.7608163356781006, "time_total_s": 155.18819093704224, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 155.18819093704224, "timesteps_since_restore": 0, "iterations_since_restore": 41, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.14, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 23.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.7, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.65, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.73, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.29, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.34, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 4.23, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.08, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.83, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.85, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.95, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.89, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.23, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.08, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.23, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.08, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.094947153254554e-14, "cur_lr": 0.0010000000474974513, "total_loss": 0.00025381380692124367, "policy_loss": 0.0006008308264426887, "vf_loss": 4.596561431884766, "vf_explained_var": 0.40004289150238037, "kl": 0.001455314108170569, "entropy": 1.6133447885513306, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 537600, "num_env_steps_trained": 537600, "num_agent_steps_sampled": 1075200, "num_agent_steps_trained": 1075200}, "sampler_results": {"episode_reward_max": 210.0, "episode_reward_min": 9.0, "episode_reward_mean": 99.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 114.0}, "policy_reward_mean": {"ppo": 49.95}, "custom_metrics": {"sparse_reward_mean": 23.6, "sparse_reward_min": 0, "sparse_reward_max": 60, "shaped_reward_mean": 52.7, "shaped_reward_min": 9, "shaped_reward_max": 90, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.65, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 13, "onion_pickup_agent_1_mean": 7.73, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.29, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.34, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 7, "potting_onion_agent_0_mean": 4.23, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.08, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 10, "dish_pickup_agent_0_mean": 4.67, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 0.83, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 0.76, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 3, "dish_drop_agent_0_mean": 2.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 2.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.85, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.95, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 1.58, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.89, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.68, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 4.23, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.08, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 10, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.23, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.08, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 10, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6895577005322571, "mean_inference_ms": 1.2079098308060834, "mean_action_processing_ms": 0.13326194274361466, "mean_env_wait_ms": 0.876727358874316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 210.0, "episode_reward_min": 9.0, "episode_reward_mean": 99.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 114.0}, "policy_reward_mean": {"ppo": 49.95}, "hist_stats": {"episode_reward": [9.0, 202.0, 36.0, 90.0, 28.0, 164.0, 96.0, 195.0, 124.0, 144.0, 69.0, 122.0, 68.0, 130.0, 150.0, 122.0, 141.0, 98.0, 79.0, 55.0, 9.0, 102.0, 12.0, 93.0, 49.0, 149.0, 90.0, 144.0, 195.0, 63.0, 88.0, 90.0, 198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 6.0, 111.0, 91.0, 13.0, 23.0, 50.0, 40.0, 22.0, 6.0, 76.0, 88.0, 56.0, 40.0, 98.0, 97.0, 69.0, 55.0, 75.0, 69.0, 37.0, 32.0, 51.0, 71.0, 39.0, 29.0, 52.0, 78.0, 68.0, 82.0, 56.0, 66.0, 69.0, 72.0, 56.0, 42.0, 43.0, 36.0, 16.0, 39.0, 3.0, 6.0, 45.0, 57.0, 3.0, 9.0, 56.0, 37.0, 30.0, 19.0, 78.0, 71.0, 44.0, 46.0, 71.0, 73.0, 89.0, 106.0, 28.0, 35.0, 34.0, 54.0, 42.0, 48.0, 98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6895577005322571, "mean_inference_ms": 1.2079098308060834, "mean_action_processing_ms": 0.13326194274361466, "mean_env_wait_ms": 0.876727358874316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1075200, "num_agent_steps_trained": 1075200, "num_env_steps_sampled": 537600, "num_env_steps_trained": 537600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 537600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1075200, "timers": {"training_iteration_time_ms": 3684.119, "learn_time_ms": 1119.015, "learn_throughput": 11438.633, "synch_weights_time_ms": 10.755}, "counters": {"num_env_steps_sampled": 537600, "num_env_steps_trained": 537600, "num_agent_steps_sampled": 1075200, "num_agent_steps_trained": 1075200}, "done": false, "episodes_total": 1344, "training_iteration": 42, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-40", "timestamp": 1666580560, "time_this_iter_s": 3.800992965698242, "time_total_s": 158.98918390274048, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 158.98918390274048, "timesteps_since_restore": 0, "iterations_since_restore": 42, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 24.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 56.1, "shaped_reward_min": 9, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.89, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.87, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.54, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.39, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 4.52, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.36, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.77, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.42, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.77, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.52, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.36, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.52, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.36, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.547473576627277e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.004181774333119392, "policy_loss": -0.0038678678683936596, "vf_loss": 4.915392875671387, "vf_explained_var": 0.3882026970386505, "kl": 0.001393836340866983, "entropy": 1.610889196395874, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 550400, "num_env_steps_trained": 550400, "num_agent_steps_sampled": 1100800, "num_agent_steps_trained": 1100800}, "sampler_results": {"episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 104.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 125.0}, "policy_reward_mean": {"ppo": 52.25}, "custom_metrics": {"sparse_reward_mean": 24.2, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 56.1, "shaped_reward_min": 9, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 6.89, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 15, "onion_pickup_agent_1_mean": 7.87, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 5.54, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 12, "useful_onion_pickup_agent_1_mean": 6.39, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 1.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 4.52, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.36, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.77, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.42, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 0.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 3, "useful_dish_pickup_agent_1_mean": 0.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.94, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.63, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 4, "soup_delivery_agent_1_mean": 1.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.78, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.77, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.52, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.36, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.52, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.36, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6892870612625809, "mean_inference_ms": 1.2077795859778049, "mean_action_processing_ms": 0.133269187249842, "mean_env_wait_ms": 0.8768115098341133, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 242.0, "episode_reward_min": 9.0, "episode_reward_mean": 104.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 125.0}, "policy_reward_mean": {"ppo": 52.25}, "hist_stats": {"episode_reward": [198.0, 28.0, 31.0, 98.0, 153.0, 141.0, 112.0, 42.0, 55.0, 155.0, 147.0, 90.0, 25.0, 109.0, 77.0, 126.0, 77.0, 39.0, 20.0, 130.0, 96.0, 9.0, 99.0, 95.0, 90.0, 132.0, 65.0, 118.0, 81.0, 156.0, 153.0, 99.0, 20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [98.0, 100.0, 19.0, 9.0, 22.0, 9.0, 37.0, 61.0, 75.0, 78.0, 68.0, 73.0, 56.0, 56.0, 17.0, 25.0, 27.0, 28.0, 70.0, 85.0, 82.0, 65.0, 37.0, 53.0, 12.0, 13.0, 67.0, 42.0, 39.0, 38.0, 73.0, 53.0, 31.0, 46.0, 28.0, 11.0, 14.0, 6.0, 62.0, 68.0, 50.0, 46.0, 3.0, 6.0, 40.0, 59.0, 48.0, 47.0, 39.0, 51.0, 70.0, 62.0, 30.0, 35.0, 56.0, 62.0, 44.0, 37.0, 85.0, 71.0, 74.0, 79.0, 34.0, 65.0, 6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6892870612625809, "mean_inference_ms": 1.2077795859778049, "mean_action_processing_ms": 0.133269187249842, "mean_env_wait_ms": 0.8768115098341133, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1100800, "num_agent_steps_trained": 1100800, "num_env_steps_sampled": 550400, "num_env_steps_trained": 550400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 550400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1100800, "timers": {"training_iteration_time_ms": 3700.067, "learn_time_ms": 1126.399, "learn_throughput": 11363.647, "synch_weights_time_ms": 10.556}, "counters": {"num_env_steps_sampled": 550400, "num_env_steps_trained": 550400, "num_agent_steps_sampled": 1100800, "num_agent_steps_trained": 1100800}, "done": false, "episodes_total": 1376, "training_iteration": 43, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-44", "timestamp": 1666580564, "time_this_iter_s": 3.8595781326293945, "time_total_s": 162.84876203536987, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 162.84876203536987, "timesteps_since_restore": 0, "iterations_since_restore": 43, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.433333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 28.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 58.94, "shaped_reward_min": 12, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.93, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 7.95, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.25, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 6.49, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 4.96, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.51, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.46, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 0.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.79, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.74, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.67, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.7, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.8, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.96, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.51, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.96, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.51, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.2737367883136385e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.004320982843637466, "policy_loss": -0.004086933098733425, "vf_loss": 5.650534629821777, "vf_explained_var": 0.3545888662338257, "kl": 0.0015218043699860573, "entropy": 1.5982072353363037, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 563200, "num_env_steps_trained": 563200, "num_agent_steps_sampled": 1126400, "num_agent_steps_trained": 1126400}, "sampler_results": {"episode_reward_max": 242.0, "episode_reward_min": 12.0, "episode_reward_mean": 116.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 125.0}, "policy_reward_mean": {"ppo": 58.27}, "custom_metrics": {"sparse_reward_mean": 28.8, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 58.94, "shaped_reward_min": 12, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 7.93, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 7.95, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.25, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 6.49, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 13, "onion_drop_agent_0_mean": 2.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 2.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 4.96, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 5.51, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 11, "dish_pickup_agent_0_mean": 4.46, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 0.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.0, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.79, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.74, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.67, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.7, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.8, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.71, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 4.96, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 5.51, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 11, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 4.96, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 5.51, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 11, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6891156559960221, "mean_inference_ms": 1.2092069581028801, "mean_action_processing_ms": 0.13325865541082607, "mean_env_wait_ms": 0.8773356894318693, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 242.0, "episode_reward_min": 12.0, "episode_reward_mean": 116.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 125.0}, "policy_reward_mean": {"ppo": 58.27}, "hist_stats": {"episode_reward": [20.0, 121.0, 63.0, 93.0, 124.0, 64.0, 187.0, 101.0, 141.0, 155.0, 66.0, 198.0, 12.0, 75.0, 31.0, 82.0, 187.0, 201.0, 47.0, 179.0, 66.0, 190.0, 196.0, 52.0, 128.0, 53.0, 76.0, 87.0, 12.0, 84.0, 82.0, 52.0, 74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [6.0, 14.0, 45.0, 76.0, 31.0, 32.0, 53.0, 40.0, 59.0, 65.0, 23.0, 41.0, 98.0, 89.0, 48.0, 53.0, 62.0, 79.0, 73.0, 82.0, 37.0, 29.0, 107.0, 91.0, 6.0, 6.0, 39.0, 36.0, 17.0, 14.0, 40.0, 42.0, 82.0, 105.0, 92.0, 109.0, 22.0, 25.0, 91.0, 88.0, 37.0, 29.0, 98.0, 92.0, 89.0, 107.0, 44.0, 8.0, 57.0, 71.0, 39.0, 14.0, 37.0, 39.0, 51.0, 36.0, 3.0, 9.0, 37.0, 47.0, 39.0, 43.0, 27.0, 25.0, 41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6891156559960221, "mean_inference_ms": 1.2092069581028801, "mean_action_processing_ms": 0.13325865541082607, "mean_env_wait_ms": 0.8773356894318693, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1126400, "num_agent_steps_trained": 1126400, "num_env_steps_sampled": 563200, "num_env_steps_trained": 563200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 563200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1126400, "timers": {"training_iteration_time_ms": 3702.636, "learn_time_ms": 1114.714, "learn_throughput": 11482.762, "synch_weights_time_ms": 10.534}, "counters": {"num_env_steps_sampled": 563200, "num_env_steps_trained": 563200, "num_agent_steps_sampled": 1126400, "num_agent_steps_trained": 1126400}, "done": false, "episodes_total": 1408, "training_iteration": 44, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-48", "timestamp": 1666580568, "time_this_iter_s": 3.862396001815796, "time_total_s": 166.71115803718567, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 166.71115803718567, "timesteps_since_restore": 0, "iterations_since_restore": 44, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.616666666666664, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 33.4, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 62.7, "shaped_reward_min": 9, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.31, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.37, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 6.92, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 1.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 5.17, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.54, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.01, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.75, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.63, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.58, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.85, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.64, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.74, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.17, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.17, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.1368683941568192e-14, "cur_lr": 0.0010000000474974513, "total_loss": -0.003592498367652297, "policy_loss": -0.00334426062181592, "vf_loss": 5.455427169799805, "vf_explained_var": 0.41585662961006165, "kl": 0.0015013518277555704, "entropy": 1.587562084197998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 576000, "num_env_steps_trained": 576000, "num_agent_steps_sampled": 1152000, "num_agent_steps_trained": 1152000}, "sampler_results": {"episode_reward_max": 244.0, "episode_reward_min": 9.0, "episode_reward_mean": 129.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 64.75}, "custom_metrics": {"sparse_reward_mean": 33.4, "sparse_reward_min": 0, "sparse_reward_max": 80, "shaped_reward_mean": 62.7, "shaped_reward_min": 9, "shaped_reward_max": 96, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.31, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.37, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 16, "useful_onion_pickup_agent_0_mean": 6.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 6.92, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 9, "onion_drop_agent_1_mean": 1.94, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 1.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 5.17, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 11, "potting_onion_agent_1_mean": 6.02, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.54, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.01, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.75, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.63, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.58, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.85, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 1.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.64, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.74, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.17, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 11, "optimal_onion_potting_agent_1_mean": 6.02, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.17, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 11, "viable_onion_potting_agent_1_mean": 6.02, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6890820883579823, "mean_inference_ms": 1.2106855797012448, "mean_action_processing_ms": 0.1332609226683101, "mean_env_wait_ms": 0.8777722925237638, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 244.0, "episode_reward_min": 9.0, "episode_reward_mean": 129.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 135.0}, "policy_reward_mean": {"ppo": 64.75}, "hist_stats": {"episode_reward": [74.0, 133.0, 210.0, 96.0, 161.0, 104.0, 12.0, 107.0, 146.0, 90.0, 55.0, 99.0, 25.0, 124.0, 242.0, 116.0, 193.0, 93.0, 45.0, 20.0, 164.0, 103.0, 154.0, 192.0, 74.0, 71.0, 149.0, 98.0, 23.0, 133.0, 159.0, 34.0, 213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [41.0, 33.0, 59.0, 74.0, 114.0, 96.0, 50.0, 46.0, 81.0, 80.0, 40.0, 64.0, 3.0, 9.0, 59.0, 48.0, 71.0, 75.0, 50.0, 40.0, 26.0, 29.0, 56.0, 43.0, 14.0, 11.0, 65.0, 59.0, 117.0, 125.0, 51.0, 65.0, 89.0, 104.0, 56.0, 37.0, 20.0, 25.0, 0.0, 20.0, 74.0, 90.0, 59.0, 44.0, 81.0, 73.0, 94.0, 98.0, 37.0, 37.0, 33.0, 38.0, 67.0, 82.0, 37.0, 61.0, 6.0, 17.0, 58.0, 75.0, 74.0, 85.0, 8.0, 26.0, 104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6890820883579823, "mean_inference_ms": 1.2106855797012448, "mean_action_processing_ms": 0.1332609226683101, "mean_env_wait_ms": 0.8777722925237638, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1152000, "num_agent_steps_trained": 1152000, "num_env_steps_sampled": 576000, "num_env_steps_trained": 576000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 576000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1152000, "timers": {"training_iteration_time_ms": 3695.92, "learn_time_ms": 1115.03, "learn_throughput": 11479.513, "synch_weights_time_ms": 10.717}, "counters": {"num_env_steps_sampled": 576000, "num_env_steps_trained": 576000, "num_agent_steps_sampled": 1152000, "num_agent_steps_trained": 1152000}, "done": false, "episodes_total": 1440, "training_iteration": 45, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-52", "timestamp": 1666580572, "time_this_iter_s": 3.6278324127197266, "time_total_s": 170.3389904499054, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 170.3389904499054, "timesteps_since_restore": 0, "iterations_since_restore": 45, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.060000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 39.2, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 66.01, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.41, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.88, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 6.7, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 7.37, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.42, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.58, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.42, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.63, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.67, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.42, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.42, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.684341970784096e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.005215235985815525, "policy_loss": -0.00502493791282177, "vf_loss": 5.936392784118652, "vf_explained_var": 0.40612637996673584, "kl": 0.0015834091464057565, "entropy": 1.5678761005401611, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 588800, "num_env_steps_trained": 588800, "num_agent_steps_sampled": 1177600, "num_agent_steps_trained": 1177600}, "sampler_results": {"episode_reward_max": 285.0, "episode_reward_min": 9.0, "episode_reward_mean": 144.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 146.0}, "policy_reward_mean": {"ppo": 72.205}, "custom_metrics": {"sparse_reward_mean": 39.2, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 66.01, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.41, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.88, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 6.7, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 7.37, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.55, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 2.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 1.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.46, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 6.42, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.58, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.42, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.93, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.57, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.54, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 1.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.63, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.67, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.46, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 6.42, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.46, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 6.42, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.689059692491561, "mean_inference_ms": 1.2121603621163812, "mean_action_processing_ms": 0.13326362140850226, "mean_env_wait_ms": 0.8780508722828152, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 285.0, "episode_reward_min": 9.0, "episode_reward_mean": 144.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 146.0}, "policy_reward_mean": {"ppo": 72.205}, "hist_stats": {"episode_reward": [213.0, 110.0, 216.0, 141.0, 150.0, 61.0, 195.0, 190.0, 87.0, 176.0, 71.0, 132.0, 147.0, 199.0, 193.0, 76.0, 102.0, 96.0, 98.0, 242.0, 70.0, 57.0, 127.0, 152.0, 77.0, 91.0, 125.0, 174.0, 152.0, 198.0, 164.0, 91.0, 179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [104.0, 109.0, 50.0, 60.0, 113.0, 103.0, 66.0, 75.0, 77.0, 73.0, 22.0, 39.0, 105.0, 90.0, 96.0, 94.0, 50.0, 37.0, 89.0, 87.0, 37.0, 34.0, 69.0, 63.0, 67.0, 80.0, 97.0, 102.0, 103.0, 90.0, 40.0, 36.0, 39.0, 63.0, 57.0, 39.0, 46.0, 52.0, 117.0, 125.0, 29.0, 41.0, 23.0, 34.0, 73.0, 54.0, 73.0, 79.0, 36.0, 41.0, 53.0, 38.0, 65.0, 60.0, 83.0, 91.0, 77.0, 75.0, 116.0, 82.0, 79.0, 85.0, 48.0, 43.0, 80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.689059692491561, "mean_inference_ms": 1.2121603621163812, "mean_action_processing_ms": 0.13326362140850226, "mean_env_wait_ms": 0.8780508722828152, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1177600, "num_agent_steps_trained": 1177600, "num_env_steps_sampled": 588800, "num_env_steps_trained": 588800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 588800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1177600, "timers": {"training_iteration_time_ms": 3694.313, "learn_time_ms": 1112.334, "learn_throughput": 11507.338, "synch_weights_time_ms": 10.858}, "counters": {"num_env_steps_sampled": 588800, "num_env_steps_trained": 588800, "num_agent_steps_sampled": 1177600, "num_agent_steps_trained": 1177600}, "done": false, "episodes_total": 1472, "training_iteration": 46, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-55", "timestamp": 1666580575, "time_this_iter_s": 3.6903653144836426, "time_total_s": 174.02935576438904, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 174.02935576438904, "timesteps_since_restore": 0, "iterations_since_restore": 46, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.340000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 43.0, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 67.15, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.24, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.83, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 6.86, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 7.58, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.51, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 6.71, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.78, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.63, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.94, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.04, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.53, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.51, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 6.71, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.51, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 6.71, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.842170985392048e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.003965587355196476, "policy_loss": -0.003803286934271455, "vf_loss": 6.179717540740967, "vf_explained_var": 0.41344791650772095, "kl": 0.0016391698736697435, "entropy": 1.5605473518371582, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 601600, "num_env_steps_trained": 601600, "num_agent_steps_sampled": 1203200, "num_agent_steps_trained": 1203200}, "sampler_results": {"episode_reward_max": 285.0, "episode_reward_min": 9.0, "episode_reward_mean": 153.15, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 153.0}, "policy_reward_mean": {"ppo": 76.575}, "custom_metrics": {"sparse_reward_mean": 43.0, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 67.15, "shaped_reward_min": 9, "shaped_reward_max": 98, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.24, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.83, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 6.86, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 14, "useful_onion_pickup_agent_1_mean": 7.58, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 1.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.51, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 6.71, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 12, "dish_pickup_agent_0_mean": 4.78, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.63, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.86, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.57, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 2.65, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 2.94, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.04, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.53, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.66, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.51, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 6.71, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 12, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.51, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 6.71, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 12, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.688973223754642, "mean_inference_ms": 1.2122168487178766, "mean_action_processing_ms": 0.13326004234755068, "mean_env_wait_ms": 0.8776148200734798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 285.0, "episode_reward_min": 9.0, "episode_reward_mean": 153.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 0.0}, "policy_reward_max": {"ppo": 153.0}, "policy_reward_mean": {"ppo": 76.575}, "hist_stats": {"episode_reward": [179.0, 101.0, 150.0, 127.0, 149.0, 104.0, 136.0, 190.0, 152.0, 193.0, 118.0, 159.0, 190.0, 83.0, 123.0, 241.0, 156.0, 23.0, 139.0, 164.0, 149.0, 153.0, 124.0, 133.0, 85.0, 17.0, 110.0, 210.0, 9.0, 152.0, 157.0, 244.0, 142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [80.0, 99.0, 36.0, 65.0, 77.0, 73.0, 57.0, 70.0, 74.0, 75.0, 56.0, 48.0, 67.0, 69.0, 83.0, 107.0, 68.0, 84.0, 89.0, 104.0, 60.0, 58.0, 79.0, 80.0, 100.0, 90.0, 42.0, 41.0, 57.0, 66.0, 106.0, 135.0, 73.0, 83.0, 11.0, 12.0, 67.0, 72.0, 67.0, 97.0, 71.0, 78.0, 66.0, 87.0, 57.0, 67.0, 66.0, 67.0, 40.0, 45.0, 9.0, 8.0, 50.0, 60.0, 99.0, 111.0, 0.0, 9.0, 73.0, 79.0, 78.0, 79.0, 112.0, 132.0, 68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.688973223754642, "mean_inference_ms": 1.2122168487178766, "mean_action_processing_ms": 0.13326004234755068, "mean_env_wait_ms": 0.8776148200734798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1203200, "num_agent_steps_trained": 1203200, "num_env_steps_sampled": 601600, "num_env_steps_trained": 601600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 601600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1203200, "timers": {"training_iteration_time_ms": 3688.223, "learn_time_ms": 1109.916, "learn_throughput": 11532.4, "synch_weights_time_ms": 10.615}, "counters": {"num_env_steps_sampled": 601600, "num_env_steps_trained": 601600, "num_agent_steps_sampled": 1203200, "num_agent_steps_trained": 1203200}, "done": false, "episodes_total": 1504, "training_iteration": 47, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-02-59", "timestamp": 1666580579, "time_this_iter_s": 3.698033094406128, "time_total_s": 177.72738885879517, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 177.72738885879517, "timesteps_since_restore": 0, "iterations_since_restore": 47, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.03333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 46.4, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 68.81, "shaped_reward_min": 12, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.2, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.96, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.01, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 7.79, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.96, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 1.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.71, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 6.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.93, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.21, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.44, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.71, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 6.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.71, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 6.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.421085492696024e-15, "cur_lr": 0.0010000000474974513, "total_loss": -0.004614857956767082, "policy_loss": -0.004442564211785793, "vf_loss": 6.058649063110352, "vf_explained_var": 0.4786253869533539, "kl": 0.0018073207465931773, "entropy": 1.556322693824768, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 614400, "num_env_steps_trained": 614400, "num_agent_steps_sampled": 1228800, "num_agent_steps_trained": 1228800}, "sampler_results": {"episode_reward_max": 299.0, "episode_reward_min": 12.0, "episode_reward_mean": 161.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 153.0}, "policy_reward_mean": {"ppo": 80.805}, "custom_metrics": {"sparse_reward_mean": 46.4, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 68.81, "shaped_reward_min": 12, "shaped_reward_max": 101, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.2, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 8.96, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.01, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 7.79, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 2.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.96, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 1.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 5.71, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 6.87, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.61, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 4, "useful_dish_pickup_agent_1_mean": 1.04, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.83, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.93, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.58, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 2.91, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.21, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.12, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.44, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.64, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 5.71, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 6.87, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 5.71, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 6.87, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6888171226626579, "mean_inference_ms": 1.2119425342109795, "mean_action_processing_ms": 0.13324135084796007, "mean_env_wait_ms": 0.8770887664610575, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 299.0, "episode_reward_min": 12.0, "episode_reward_mean": 161.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 153.0}, "policy_reward_mean": {"ppo": 80.805}, "hist_stats": {"episode_reward": [142.0, 152.0, 222.0, 142.0, 104.0, 118.0, 101.0, 166.0, 285.0, 50.0, 134.0, 173.0, 102.0, 114.0, 156.0, 152.0, 149.0, 153.0, 196.0, 130.0, 129.0, 238.0, 219.0, 209.0, 178.0, 79.0, 188.0, 149.0, 196.0, 147.0, 85.0, 245.0, 203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [68.0, 74.0, 79.0, 73.0, 104.0, 118.0, 74.0, 68.0, 55.0, 49.0, 50.0, 68.0, 43.0, 58.0, 89.0, 77.0, 139.0, 146.0, 19.0, 31.0, 70.0, 64.0, 90.0, 83.0, 59.0, 43.0, 57.0, 57.0, 79.0, 77.0, 72.0, 80.0, 68.0, 81.0, 59.0, 94.0, 110.0, 86.0, 68.0, 62.0, 69.0, 60.0, 114.0, 124.0, 120.0, 99.0, 116.0, 93.0, 84.0, 94.0, 37.0, 42.0, 95.0, 93.0, 60.0, 89.0, 93.0, 103.0, 76.0, 71.0, 44.0, 41.0, 121.0, 124.0, 113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6888171226626579, "mean_inference_ms": 1.2119425342109795, "mean_action_processing_ms": 0.13324135084796007, "mean_env_wait_ms": 0.8770887664610575, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1228800, "num_agent_steps_trained": 1228800, "num_env_steps_sampled": 614400, "num_env_steps_trained": 614400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 614400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1228800, "timers": {"training_iteration_time_ms": 3673.047, "learn_time_ms": 1105.463, "learn_throughput": 11578.86, "synch_weights_time_ms": 10.566}, "counters": {"num_env_steps_sampled": 614400, "num_env_steps_trained": 614400, "num_agent_steps_sampled": 1228800, "num_agent_steps_trained": 1228800}, "done": false, "episodes_total": 1536, "training_iteration": 48, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-03", "timestamp": 1666580583, "time_this_iter_s": 3.613640546798706, "time_total_s": 181.34102940559387, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 181.34102940559387, "timesteps_since_restore": 0, "iterations_since_restore": 48, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.92, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 50.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 71.75, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 9.02, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.12, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.87, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.01, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 7.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 1.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.24, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.42, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.6, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 6.01, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 7.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.01, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 7.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.10542746348012e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.00159242725931108, "policy_loss": -0.001501859282143414, "vf_loss": 6.712205410003662, "vf_explained_var": 0.4798794388771057, "kl": 0.0016322416486218572, "entropy": 1.5235841274261475, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 627200, "num_env_steps_trained": 627200, "num_agent_steps_sampled": 1254400, "num_agent_steps_trained": 1254400}, "sampler_results": {"episode_reward_max": 318.0, "episode_reward_min": 12.0, "episode_reward_mean": 173.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 173.0}, "policy_reward_mean": {"ppo": 86.675}, "custom_metrics": {"sparse_reward_mean": 50.8, "sparse_reward_min": 0, "sparse_reward_max": 100, "shaped_reward_mean": 71.75, "shaped_reward_min": 12, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.53, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 9.02, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.31, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.12, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 14, "onion_drop_agent_0_mean": 2.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.87, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 6.01, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 7.16, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 14, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 1.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.06, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.93, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.76, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.05, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.24, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.42, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.6, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 6, "optimal_onion_potting_agent_0_mean": 6.01, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 7.16, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 14, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.01, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 7.16, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 14, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6887080065883876, "mean_inference_ms": 1.2116564477850689, "mean_action_processing_ms": 0.13322398997678847, "mean_env_wait_ms": 0.8764812890451134, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 318.0, "episode_reward_min": 12.0, "episode_reward_mean": 173.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 173.0}, "policy_reward_mean": {"ppo": 86.675}, "hist_stats": {"episode_reward": [203.0, 139.0, 158.0, 145.0, 133.0, 147.0, 136.0, 241.0, 192.0, 190.0, 91.0, 88.0, 90.0, 117.0, 136.0, 193.0, 96.0, 115.0, 64.0, 250.0, 133.0, 238.0, 187.0, 87.0, 285.0, 161.0, 161.0, 204.0, 104.0, 204.0, 241.0, 193.0, 243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [113.0, 90.0, 68.0, 71.0, 80.0, 78.0, 79.0, 66.0, 61.0, 72.0, 70.0, 77.0, 79.0, 57.0, 114.0, 127.0, 84.0, 108.0, 88.0, 102.0, 42.0, 49.0, 40.0, 48.0, 45.0, 45.0, 49.0, 68.0, 77.0, 59.0, 89.0, 104.0, 44.0, 52.0, 56.0, 59.0, 25.0, 39.0, 124.0, 126.0, 63.0, 70.0, 110.0, 128.0, 94.0, 93.0, 50.0, 37.0, 132.0, 153.0, 82.0, 79.0, 79.0, 82.0, 108.0, 96.0, 45.0, 59.0, 110.0, 94.0, 122.0, 119.0, 88.0, 105.0, 118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6887080065883876, "mean_inference_ms": 1.2116564477850689, "mean_action_processing_ms": 0.13322398997678847, "mean_env_wait_ms": 0.8764812890451134, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1254400, "num_agent_steps_trained": 1254400, "num_env_steps_sampled": 627200, "num_env_steps_trained": 627200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 627200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1254400, "timers": {"training_iteration_time_ms": 3667.292, "learn_time_ms": 1097.604, "learn_throughput": 11661.76, "synch_weights_time_ms": 10.726}, "counters": {"num_env_steps_sampled": 627200, "num_env_steps_trained": 627200, "num_agent_steps_sampled": 1254400, "num_agent_steps_trained": 1254400}, "done": false, "episodes_total": 1568, "training_iteration": 49, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-07", "timestamp": 1666580587, "time_this_iter_s": 3.711414098739624, "time_total_s": 185.0524435043335, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 185.0524435043335, "timesteps_since_restore": 0, "iterations_since_restore": 49, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.000000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 57.0, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 73.04, "shaped_reward_min": 9, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.56, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 9.37, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.32, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.37, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.95, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.88, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.22, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 7.38, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.48, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.22, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 7.38, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.22, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 7.38, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.55271373174006e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.004688685759902, "policy_loss": -0.004600519314408302, "vf_loss": 6.720330238342285, "vf_explained_var": 0.5159987807273865, "kl": 0.0016663586720824242, "entropy": 1.5204063653945923, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 640000, "num_env_steps_trained": 640000, "num_agent_steps_sampled": 1280000, "num_agent_steps_trained": 1280000}, "sampler_results": {"episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 187.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 176.0}, "policy_reward_mean": {"ppo": 93.52}, "custom_metrics": {"sparse_reward_mean": 57.0, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 73.04, "shaped_reward_min": 9, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.56, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 9.37, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 7.32, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.37, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.7, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.95, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.88, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.22, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 12, "potting_onion_agent_1_mean": 7.38, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.14, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 2.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 2.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.48, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.22, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 12, "optimal_onion_potting_agent_1_mean": 7.38, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.22, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 12, "viable_onion_potting_agent_1_mean": 7.38, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6886453974613268, "mean_inference_ms": 1.2113947739430153, "mean_action_processing_ms": 0.13321217952264544, "mean_env_wait_ms": 0.8759285348880911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 187.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 176.0}, "policy_reward_mean": {"ppo": 93.52}, "hist_stats": {"episode_reward": [243.0, 136.0, 236.0, 155.0, 147.0, 95.0, 158.0, 141.0, 66.0, 132.0, 182.0, 253.0, 106.0, 185.0, 130.0, 158.0, 133.0, 299.0, 123.0, 246.0, 187.0, 74.0, 230.0, 12.0, 290.0, 155.0, 88.0, 158.0, 258.0, 160.0, 193.0, 252.0, 244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [118.0, 125.0, 71.0, 65.0, 116.0, 120.0, 64.0, 91.0, 69.0, 78.0, 45.0, 50.0, 78.0, 80.0, 62.0, 79.0, 40.0, 26.0, 55.0, 77.0, 94.0, 88.0, 124.0, 129.0, 51.0, 55.0, 91.0, 94.0, 62.0, 68.0, 67.0, 91.0, 68.0, 65.0, 148.0, 151.0, 57.0, 66.0, 129.0, 117.0, 87.0, 100.0, 39.0, 35.0, 123.0, 107.0, 3.0, 9.0, 146.0, 144.0, 66.0, 89.0, 37.0, 51.0, 61.0, 97.0, 142.0, 116.0, 68.0, 92.0, 87.0, 106.0, 105.0, 147.0, 120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6886453974613268, "mean_inference_ms": 1.2113947739430153, "mean_action_processing_ms": 0.13321217952264544, "mean_env_wait_ms": 0.8759285348880911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1280000, "num_agent_steps_trained": 1280000, "num_env_steps_sampled": 640000, "num_env_steps_trained": 640000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 640000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1280000, "timers": {"training_iteration_time_ms": 3674.795, "learn_time_ms": 1102.244, "learn_throughput": 11612.672, "synch_weights_time_ms": 11.619}, "counters": {"num_env_steps_sampled": 640000, "num_env_steps_trained": 640000, "num_agent_steps_sampled": 1280000, "num_agent_steps_trained": 1280000}, "done": false, "episodes_total": 1600, "training_iteration": 50, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-11", "timestamp": 1666580591, "time_this_iter_s": 3.7579612731933594, "time_total_s": 188.81040477752686, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 188.81040477752686, "timesteps_since_restore": 0, "iterations_since_restore": 50, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 58.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 74.18, "shaped_reward_min": 9, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.97, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.16, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.71, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.13, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.81, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.83, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.74, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.12, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.79, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.43, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.74, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.12, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.74, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.12, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.77635686587003e-16, "cur_lr": 0.0010000000474974513, "total_loss": -0.00452738581225276, "policy_loss": -0.004407214000821114, "vf_loss": 6.4294867515563965, "vf_explained_var": 0.5273363590240479, "kl": 0.0014962749555706978, "entropy": 1.526247262954712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 652800, "num_env_steps_trained": 652800, "num_agent_steps_sampled": 1305600, "num_agent_steps_trained": 1305600}, "sampler_results": {"episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 191.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 176.0}, "policy_reward_mean": {"ppo": 95.89}, "custom_metrics": {"sparse_reward_mean": 58.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 74.18, "shaped_reward_min": 9, "shaped_reward_max": 118, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.97, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.16, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 7.71, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.13, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 15, "onion_drop_agent_0_mean": 1.81, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.74, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 1.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.83, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.74, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.12, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 15, "dish_pickup_agent_0_mean": 4.79, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.26, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.74, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.69, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.13, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 5, "soup_delivery_agent_1_mean": 2.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.4, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.43, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.74, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.12, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 15, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.74, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.12, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 15, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6886981609168396, "mean_inference_ms": 1.2112370650019237, "mean_action_processing_ms": 0.1332088502478148, "mean_env_wait_ms": 0.8754223222096275, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 191.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 176.0}, "policy_reward_mean": {"ppo": 95.89}, "hist_stats": {"episode_reward": [244.0, 204.0, 120.0, 87.0, 134.0, 209.0, 85.0, 90.0, 55.0, 142.0, 189.0, 278.0, 239.0, 236.0, 110.0, 192.0, 253.0, 244.0, 195.0, 147.0, 299.0, 276.0, 213.0, 170.0, 206.0, 184.0, 233.0, 318.0, 207.0, 302.0, 199.0, 141.0, 80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [120.0, 124.0, 91.0, 113.0, 52.0, 68.0, 51.0, 36.0, 79.0, 55.0, 100.0, 109.0, 29.0, 56.0, 46.0, 44.0, 30.0, 25.0, 60.0, 82.0, 103.0, 86.0, 131.0, 147.0, 122.0, 117.0, 118.0, 118.0, 67.0, 43.0, 105.0, 87.0, 133.0, 120.0, 110.0, 134.0, 97.0, 98.0, 82.0, 65.0, 143.0, 156.0, 136.0, 140.0, 91.0, 122.0, 98.0, 72.0, 104.0, 102.0, 70.0, 114.0, 103.0, 130.0, 145.0, 173.0, 112.0, 95.0, 156.0, 146.0, 80.0, 119.0, 64.0, 77.0, 37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6886981609168396, "mean_inference_ms": 1.2112370650019237, "mean_action_processing_ms": 0.1332088502478148, "mean_env_wait_ms": 0.8754223222096275, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1305600, "num_agent_steps_trained": 1305600, "num_env_steps_sampled": 652800, "num_env_steps_trained": 652800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 652800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1305600, "timers": {"training_iteration_time_ms": 3681.953, "learn_time_ms": 1104.988, "learn_throughput": 11583.841, "synch_weights_time_ms": 11.741}, "counters": {"num_env_steps_sampled": 652800, "num_env_steps_trained": 652800, "num_agent_steps_sampled": 1305600, "num_agent_steps_trained": 1305600}, "done": false, "episodes_total": 1632, "training_iteration": 51, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-15", "timestamp": 1666580595, "time_this_iter_s": 3.8342444896698, "time_total_s": 192.64464926719666, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 192.64464926719666, "timesteps_since_restore": 0, "iterations_since_restore": 51, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.21666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 61.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 73.3, "shaped_reward_min": 9, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.73, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.28, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.59, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.13, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.95, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.23, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.06, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.66, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.77, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.36, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.33, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.23, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.23, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 8.88178432935015e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.004982168320566416, "policy_loss": -0.0049325828440487385, "vf_loss": 6.978565216064453, "vf_explained_var": 0.522409200668335, "kl": 0.0016363689210265875, "entropy": 1.4948899745941162, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 665600, "num_env_steps_trained": 665600, "num_agent_steps_sampled": 1331200, "num_agent_steps_trained": 1331200}, "sampler_results": {"episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 196.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 184.0}, "policy_reward_mean": {"ppo": 98.45}, "custom_metrics": {"sparse_reward_mean": 61.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 73.3, "shaped_reward_min": 9, "shaped_reward_max": 115, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.73, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.28, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.59, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.13, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.67, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.95, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.65, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.23, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.06, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.11, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.66, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 2.77, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.29, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.36, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.33, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.65, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.23, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.65, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.23, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.688734889605353, "mean_inference_ms": 1.2110129239657774, "mean_action_processing_ms": 0.13319930408174357, "mean_env_wait_ms": 0.8749155207185934, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 338.0, "episode_reward_min": 9.0, "episode_reward_mean": 196.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 184.0}, "policy_reward_mean": {"ppo": 98.45}, "hist_stats": {"episode_reward": [80.0, 207.0, 189.0, 155.0, 213.0, 287.0, 250.0, 196.0, 193.0, 93.0, 131.0, 292.0, 281.0, 282.0, 247.0, 247.0, 132.0, 227.0, 9.0, 284.0, 292.0, 110.0, 195.0, 145.0, 165.0, 338.0, 279.0, 193.0, 244.0, 136.0, 187.0, 285.0, 12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [37.0, 43.0, 112.0, 95.0, 97.0, 92.0, 87.0, 68.0, 106.0, 107.0, 142.0, 145.0, 122.0, 128.0, 85.0, 111.0, 89.0, 104.0, 48.0, 45.0, 62.0, 69.0, 159.0, 133.0, 139.0, 142.0, 133.0, 149.0, 123.0, 124.0, 119.0, 128.0, 64.0, 68.0, 105.0, 122.0, 3.0, 6.0, 132.0, 152.0, 131.0, 161.0, 56.0, 54.0, 104.0, 91.0, 61.0, 84.0, 83.0, 82.0, 162.0, 176.0, 139.0, 140.0, 82.0, 111.0, 109.0, 135.0, 71.0, 65.0, 105.0, 82.0, 142.0, 143.0, 3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.688734889605353, "mean_inference_ms": 1.2110129239657774, "mean_action_processing_ms": 0.13319930408174357, "mean_env_wait_ms": 0.8749155207185934, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1331200, "num_agent_steps_trained": 1331200, "num_env_steps_sampled": 665600, "num_env_steps_trained": 665600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 665600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1331200, "timers": {"training_iteration_time_ms": 3673.186, "learn_time_ms": 1096.032, "learn_throughput": 11678.494, "synch_weights_time_ms": 11.104}, "counters": {"num_env_steps_sampled": 665600, "num_env_steps_trained": 665600, "num_agent_steps_sampled": 1331200, "num_agent_steps_trained": 1331200}, "done": false, "episodes_total": 1664, "training_iteration": 52, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-19", "timestamp": 1666580599, "time_this_iter_s": 3.693157911300659, "time_total_s": 196.33780717849731, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 196.33780717849731, "timesteps_since_restore": 0, "iterations_since_restore": 52, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 63.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 75.57, "shaped_reward_min": 12, "shaped_reward_max": 116, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.84, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.63, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.64, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.54, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 1.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 6.76, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.5, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.88, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.3, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.28, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.3, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.76, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.5, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.76, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.5, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.440892164675075e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.003979704808443785, "policy_loss": -0.0039646499790251255, "vf_loss": 7.258543968200684, "vf_explained_var": 0.5249006748199463, "kl": 0.0017533027566969395, "entropy": 1.481823205947876, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 678400, "num_env_steps_trained": 678400, "num_agent_steps_sampled": 1356800, "num_agent_steps_trained": 1356800}, "sampler_results": {"episode_reward_max": 356.0, "episode_reward_min": 12.0, "episode_reward_mean": 203.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 101.585}, "custom_metrics": {"sparse_reward_mean": 63.8, "sparse_reward_min": 0, "sparse_reward_max": 120, "shaped_reward_mean": 75.57, "shaped_reward_min": 12, "shaped_reward_max": 116, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.84, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 9.63, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.64, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 8.54, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.71, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 1.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 6.76, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 7.5, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 4.88, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.3, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.95, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.58, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.72, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 2.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.28, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.3, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 6.76, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 7.5, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.76, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 7.5, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6887014698007187, "mean_inference_ms": 1.2107337327663068, "mean_action_processing_ms": 0.13317170881738327, "mean_env_wait_ms": 0.8742339680417999, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 356.0, "episode_reward_min": 12.0, "episode_reward_mean": 203.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 101.585}, "hist_stats": {"episode_reward": [12.0, 188.0, 238.0, 120.0, 178.0, 235.0, 204.0, 260.0, 190.0, 156.0, 80.0, 250.0, 182.0, 209.0, 136.0, 244.0, 168.0, 23.0, 238.0, 247.0, 207.0, 193.0, 187.0, 94.0, 231.0, 135.0, 120.0, 253.0, 148.0, 292.0, 77.0, 96.0, 296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [3.0, 9.0, 86.0, 102.0, 111.0, 127.0, 58.0, 62.0, 79.0, 99.0, 112.0, 123.0, 105.0, 99.0, 141.0, 119.0, 82.0, 108.0, 88.0, 68.0, 43.0, 37.0, 123.0, 127.0, 85.0, 97.0, 106.0, 103.0, 68.0, 68.0, 123.0, 121.0, 85.0, 83.0, 3.0, 20.0, 108.0, 130.0, 137.0, 110.0, 104.0, 103.0, 107.0, 86.0, 98.0, 89.0, 52.0, 42.0, 102.0, 129.0, 69.0, 66.0, 46.0, 74.0, 112.0, 141.0, 83.0, 65.0, 152.0, 140.0, 40.0, 37.0, 50.0, 46.0, 157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6887014698007187, "mean_inference_ms": 1.2107337327663068, "mean_action_processing_ms": 0.13317170881738327, "mean_env_wait_ms": 0.8742339680417999, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1356800, "num_agent_steps_trained": 1356800, "num_env_steps_sampled": 678400, "num_env_steps_trained": 678400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 678400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1356800, "timers": {"training_iteration_time_ms": 3668.9, "learn_time_ms": 1097.264, "learn_throughput": 11665.377, "synch_weights_time_ms": 10.808}, "counters": {"num_env_steps_sampled": 678400, "num_env_steps_trained": 678400, "num_agent_steps_sampled": 1356800, "num_agent_steps_trained": 1356800}, "done": false, "episodes_total": 1696, "training_iteration": 53, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-23", "timestamp": 1666580603, "time_this_iter_s": 3.820449113845825, "time_total_s": 200.15825629234314, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 200.15825629234314, "timesteps_since_restore": 0, "iterations_since_restore": 53, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 72.4, "sparse_reward_min": 20, "sparse_reward_max": 120, "shaped_reward_mean": 81.74, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.06, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 10.42, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.89, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.39, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.83, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.98, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.35, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.97, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.66, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.26, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.21, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 6.98, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.35, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.98, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.35, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.2204460823375376e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.005493534728884697, "policy_loss": -0.005526891443878412, "vf_loss": 7.647051811218262, "vf_explained_var": 0.5422403216362, "kl": 0.001551097142510116, "entropy": 1.4626996517181396, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 691200, "num_env_steps_trained": 691200, "num_agent_steps_sampled": 1382400, "num_agent_steps_trained": 1382400}, "sampler_results": {"episode_reward_max": 356.0, "episode_reward_min": 69.0, "episode_reward_mean": 226.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 28.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 113.27}, "custom_metrics": {"sparse_reward_mean": 72.4, "sparse_reward_min": 20, "sparse_reward_max": 120, "shaped_reward_mean": 81.74, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.06, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 10.42, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 7.89, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.39, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.65, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 1.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.83, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 6.98, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 14, "potting_onion_agent_1_mean": 8.35, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.71, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 15, "useful_dish_pickup_agent_0_mean": 1.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 4, "dish_drop_agent_0_mean": 1.78, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.97, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.66, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.78, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.26, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.21, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 6.98, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 14, "optimal_onion_potting_agent_1_mean": 8.35, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 6.98, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 14, "viable_onion_potting_agent_1_mean": 8.35, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6885774823538797, "mean_inference_ms": 1.2104421536435193, "mean_action_processing_ms": 0.1331451018686985, "mean_env_wait_ms": 0.8735049641694252, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 356.0, "episode_reward_min": 69.0, "episode_reward_mean": 226.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 28.0}, "policy_reward_max": {"ppo": 185.0}, "policy_reward_mean": {"ppo": 113.27}, "hist_stats": {"episode_reward": [296.0, 239.0, 210.0, 77.0, 255.0, 133.0, 193.0, 298.0, 239.0, 176.0, 71.0, 69.0, 231.0, 273.0, 212.0, 127.0, 180.0, 236.0, 158.0, 276.0, 282.0, 213.0, 264.0, 284.0, 196.0, 256.0, 163.0, 145.0, 204.0, 168.0, 179.0, 335.0, 299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [157.0, 139.0, 134.0, 105.0, 96.0, 114.0, 39.0, 38.0, 127.0, 128.0, 55.0, 78.0, 94.0, 99.0, 157.0, 141.0, 126.0, 113.0, 88.0, 88.0, 28.0, 43.0, 37.0, 32.0, 127.0, 104.0, 140.0, 133.0, 113.0, 99.0, 59.0, 68.0, 100.0, 80.0, 120.0, 116.0, 78.0, 80.0, 135.0, 141.0, 140.0, 142.0, 108.0, 105.0, 132.0, 132.0, 151.0, 133.0, 107.0, 89.0, 131.0, 125.0, 75.0, 88.0, 75.0, 70.0, 94.0, 110.0, 85.0, 83.0, 85.0, 94.0, 151.0, 184.0, 148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6885774823538797, "mean_inference_ms": 1.2104421536435193, "mean_action_processing_ms": 0.1331451018686985, "mean_env_wait_ms": 0.8735049641694252, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1382400, "num_agent_steps_trained": 1382400, "num_env_steps_sampled": 691200, "num_env_steps_trained": 691200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 691200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1382400, "timers": {"training_iteration_time_ms": 3649.094, "learn_time_ms": 1101.345, "learn_throughput": 11622.149, "synch_weights_time_ms": 11.009}, "counters": {"num_env_steps_sampled": 691200, "num_env_steps_trained": 691200, "num_agent_steps_sampled": 1382400, "num_agent_steps_trained": 1382400}, "done": false, "episodes_total": 1728, "training_iteration": 54, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-27", "timestamp": 1666580607, "time_this_iter_s": 3.6673367023468018, "time_total_s": 203.82559299468994, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 203.82559299468994, "timesteps_since_restore": 0, "iterations_since_restore": 54, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.0, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 76.2, "sparse_reward_min": 20, "sparse_reward_max": 140, "shaped_reward_mean": 83.9, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.21, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 10.41, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.1, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.47, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 7.25, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 8.47, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.68, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.76, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 7.25, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 8.47, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.25, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 8.47, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.1102230411687688e-17, "cur_lr": 0.0010000000474974513, "total_loss": -0.003027592319995165, "policy_loss": -0.0030293785966932774, "vf_loss": 7.28782320022583, "vf_explained_var": 0.5821901559829712, "kl": 0.0017681021708995104, "entropy": 1.4539985656738281, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 704000, "num_env_steps_trained": 704000, "num_agent_steps_sampled": 1408000, "num_agent_steps_trained": 1408000}, "sampler_results": {"episode_reward_max": 390.0, "episode_reward_min": 69.0, "episode_reward_mean": 236.3, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 118.15}, "custom_metrics": {"sparse_reward_mean": 76.2, "sparse_reward_min": 20, "sparse_reward_max": 140, "shaped_reward_mean": 83.9, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.21, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 10.41, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.1, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.47, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.69, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.58, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.88, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.8, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 7.25, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 13, "potting_onion_agent_1_mean": 8.47, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.87, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.0, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.68, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 2.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.76, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.17, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.17, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 7.25, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 13, "optimal_onion_potting_agent_1_mean": 8.47, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.25, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 13, "viable_onion_potting_agent_1_mean": 8.47, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6884233098602586, "mean_inference_ms": 1.2102454391431337, "mean_action_processing_ms": 0.133125106706314, "mean_env_wait_ms": 0.8727177708913959, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 390.0, "episode_reward_min": 69.0, "episode_reward_mean": 236.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 118.15}, "hist_stats": {"episode_reward": [299.0, 126.0, 176.0, 296.0, 256.0, 302.0, 356.0, 255.0, 233.0, 230.0, 299.0, 141.0, 202.0, 190.0, 301.0, 74.0, 179.0, 201.0, 236.0, 152.0, 121.0, 239.0, 144.0, 288.0, 330.0, 193.0, 228.0, 113.0, 316.0, 288.0, 305.0, 215.0, 256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [148.0, 151.0, 68.0, 58.0, 100.0, 76.0, 153.0, 143.0, 126.0, 130.0, 162.0, 140.0, 171.0, 185.0, 130.0, 125.0, 125.0, 108.0, 113.0, 117.0, 129.0, 170.0, 70.0, 71.0, 99.0, 103.0, 97.0, 93.0, 161.0, 140.0, 42.0, 32.0, 92.0, 87.0, 101.0, 100.0, 121.0, 115.0, 73.0, 79.0, 51.0, 70.0, 120.0, 119.0, 81.0, 63.0, 148.0, 140.0, 162.0, 168.0, 93.0, 100.0, 113.0, 115.0, 53.0, 60.0, 169.0, 147.0, 137.0, 151.0, 157.0, 148.0, 96.0, 119.0, 124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6884233098602586, "mean_inference_ms": 1.2102454391431337, "mean_action_processing_ms": 0.133125106706314, "mean_env_wait_ms": 0.8727177708913959, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1408000, "num_agent_steps_trained": 1408000, "num_env_steps_sampled": 704000, "num_env_steps_trained": 704000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 704000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1408000, "timers": {"training_iteration_time_ms": 3656.641, "learn_time_ms": 1113.051, "learn_throughput": 11499.919, "synch_weights_time_ms": 10.828}, "counters": {"num_env_steps_sampled": 704000, "num_env_steps_trained": 704000, "num_agent_steps_sampled": 1408000, "num_agent_steps_trained": 1408000}, "done": false, "episodes_total": 1760, "training_iteration": 55, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-30", "timestamp": 1666580610, "time_this_iter_s": 3.7109124660491943, "time_total_s": 207.53650546073914, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 207.53650546073914, "timesteps_since_restore": 0, "iterations_since_restore": 55, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.1, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 80.8, "sparse_reward_min": 20, "sparse_reward_max": 140, "shaped_reward_mean": 85.72, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.96, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.54, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.13, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.72, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 7.26, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.9, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.27, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 7.26, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.9, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.26, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.9, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.551115205843844e-18, "cur_lr": 0.0010000000474974513, "total_loss": -7.447449024766684e-05, "policy_loss": -0.00013297703117132187, "vf_loss": 7.748741149902344, "vf_explained_var": 0.5911275744438171, "kl": 0.002213613362982869, "entropy": 1.4327480792999268, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 716800, "num_env_steps_trained": 716800, "num_agent_steps_sampled": 1433600, "num_agent_steps_trained": 1433600}, "sampler_results": {"episode_reward_max": 390.0, "episode_reward_min": 69.0, "episode_reward_mean": 247.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 123.66}, "custom_metrics": {"sparse_reward_mean": 80.8, "sparse_reward_min": 20, "sparse_reward_max": 140, "shaped_reward_mean": 85.72, "shaped_reward_min": 29, "shaped_reward_max": 127, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 8.96, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.54, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 17, "useful_onion_pickup_agent_0_mean": 8.13, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 9.72, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 16, "onion_drop_agent_0_mean": 1.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.72, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 7.26, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 8.9, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.27, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 2.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 2.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.21, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.28, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 7.26, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 8.9, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.26, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 8.9, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6882648118682849, "mean_inference_ms": 1.2100162140522224, "mean_action_processing_ms": 0.13309189461136894, "mean_env_wait_ms": 0.8718994654894457, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 390.0, "episode_reward_min": 69.0, "episode_reward_mean": 247.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 123.66}, "hist_stats": {"episode_reward": [256.0, 230.0, 176.0, 142.0, 233.0, 285.0, 252.0, 198.0, 155.0, 301.0, 190.0, 258.0, 289.0, 175.0, 188.0, 233.0, 293.0, 241.0, 297.0, 292.0, 348.0, 327.0, 188.0, 305.0, 175.0, 290.0, 158.0, 259.0, 264.0, 290.0, 248.0, 209.0, 301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [124.0, 132.0, 99.0, 131.0, 69.0, 107.0, 80.0, 62.0, 125.0, 108.0, 146.0, 139.0, 117.0, 135.0, 102.0, 96.0, 84.0, 71.0, 156.0, 145.0, 96.0, 94.0, 126.0, 132.0, 133.0, 156.0, 83.0, 92.0, 100.0, 88.0, 122.0, 111.0, 143.0, 150.0, 119.0, 122.0, 150.0, 147.0, 137.0, 155.0, 176.0, 172.0, 166.0, 161.0, 85.0, 103.0, 151.0, 154.0, 92.0, 83.0, 131.0, 159.0, 81.0, 77.0, 134.0, 125.0, 132.0, 132.0, 157.0, 133.0, 121.0, 127.0, 96.0, 113.0, 140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6882648118682849, "mean_inference_ms": 1.2100162140522224, "mean_action_processing_ms": 0.13309189461136894, "mean_env_wait_ms": 0.8718994654894457, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1433600, "num_agent_steps_trained": 1433600, "num_env_steps_sampled": 716800, "num_env_steps_trained": 716800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 716800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1433600, "timers": {"training_iteration_time_ms": 3651.298, "learn_time_ms": 1117.544, "learn_throughput": 11453.684, "synch_weights_time_ms": 11.295}, "counters": {"num_env_steps_sampled": 716800, "num_env_steps_trained": 716800, "num_agent_steps_sampled": 1433600, "num_agent_steps_trained": 1433600}, "done": false, "episodes_total": 1792, "training_iteration": 56, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-34", "timestamp": 1666580614, "time_this_iter_s": 3.633439302444458, "time_total_s": 211.1699447631836, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 211.1699447631836, "timesteps_since_restore": 0, "iterations_since_restore": 56, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.733333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 87.4, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 88.81, "shaped_reward_min": 29, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.41, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.68, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 8.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 10.08, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 7.87, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 9.21, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.04, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.84, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 7.87, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 9.21, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.87, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 9.21, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.775557602921922e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.00404885271564126, "policy_loss": -0.004144429694861174, "vf_loss": 8.007272720336914, "vf_explained_var": 0.5925405025482178, "kl": 0.002025268506258726, "entropy": 1.4103046655654907, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 729600, "num_env_steps_trained": 729600, "num_agent_steps_sampled": 1459200, "num_agent_steps_trained": 1459200}, "sampler_results": {"episode_reward_max": 401.0, "episode_reward_min": 69.0, "episode_reward_mean": 263.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 131.805}, "custom_metrics": {"sparse_reward_mean": 87.4, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 88.81, "shaped_reward_min": 29, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.41, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 16, "onion_pickup_agent_1_mean": 10.68, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 8.77, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 15, "useful_onion_pickup_agent_1_mean": 10.08, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.53, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 7.87, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 15, "potting_onion_agent_1_mean": 9.21, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 1.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.04, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.84, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.3, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 7.87, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 15, "optimal_onion_potting_agent_1_mean": 9.21, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 7.87, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 15, "viable_onion_potting_agent_1_mean": 9.21, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6881554542706896, "mean_inference_ms": 1.2097325725572832, "mean_action_processing_ms": 0.1330667926674265, "mean_env_wait_ms": 0.8711230320174765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 401.0, "episode_reward_min": 69.0, "episode_reward_mean": 263.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 219.0}, "policy_reward_mean": {"ppo": 131.805}, "hist_stats": {"episode_reward": [301.0, 246.0, 199.0, 241.0, 273.0, 390.0, 188.0, 296.0, 239.0, 253.0, 196.0, 356.0, 69.0, 125.0, 247.0, 182.0, 239.0, 161.0, 123.0, 230.0, 307.0, 348.0, 188.0, 190.0, 252.0, 218.0, 342.0, 290.0, 341.0, 293.0, 180.0, 273.0, 128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [140.0, 161.0, 135.0, 111.0, 101.0, 98.0, 117.0, 124.0, 149.0, 124.0, 171.0, 219.0, 95.0, 93.0, 139.0, 157.0, 114.0, 125.0, 127.0, 126.0, 101.0, 95.0, 163.0, 193.0, 32.0, 37.0, 59.0, 66.0, 117.0, 130.0, 88.0, 94.0, 119.0, 120.0, 79.0, 82.0, 57.0, 66.0, 122.0, 108.0, 148.0, 159.0, 174.0, 174.0, 89.0, 99.0, 105.0, 85.0, 134.0, 118.0, 111.0, 107.0, 170.0, 172.0, 159.0, 131.0, 169.0, 172.0, 148.0, 145.0, 80.0, 100.0, 138.0, 135.0, 69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6881554542706896, "mean_inference_ms": 1.2097325725572832, "mean_action_processing_ms": 0.1330667926674265, "mean_env_wait_ms": 0.8711230320174765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1459200, "num_agent_steps_trained": 1459200, "num_env_steps_sampled": 729600, "num_env_steps_trained": 729600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 729600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1459200, "timers": {"training_iteration_time_ms": 3658.574, "learn_time_ms": 1121.88, "learn_throughput": 11409.424, "synch_weights_time_ms": 12.08}, "counters": {"num_env_steps_sampled": 729600, "num_env_steps_trained": 729600, "num_agent_steps_sampled": 1459200, "num_agent_steps_trained": 1459200}, "done": false, "episodes_total": 1824, "training_iteration": 57, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-38", "timestamp": 1666580618, "time_this_iter_s": 3.7686376571655273, "time_total_s": 214.93858242034912, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 214.93858242034912, "timesteps_since_restore": 0, "iterations_since_restore": 57, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.7, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 95.0, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 93.52, "shaped_reward_min": 45, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.86, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 11.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.27, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 10.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.35, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.72, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.27, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.89, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.85, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.11, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.06, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.26, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.35, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.72, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.35, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.72, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.387778801460961e-18, "cur_lr": 0.0010000000474974513, "total_loss": -0.0027208023238927126, "policy_loss": -0.0028445827774703503, "vf_loss": 8.135013580322266, "vf_explained_var": 0.5896250009536743, "kl": 0.001949745579622686, "entropy": 1.3794457912445068, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 742400, "num_env_steps_trained": 742400, "num_agent_steps_sampled": 1484800, "num_agent_steps_trained": 1484800}, "sampler_results": {"episode_reward_max": 402.0, "episode_reward_min": 85.0, "episode_reward_mean": 283.52, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 212.0}, "policy_reward_mean": {"ppo": 141.76}, "custom_metrics": {"sparse_reward_mean": 95.0, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 93.52, "shaped_reward_min": 45, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 9.86, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 11.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.27, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 10.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.52, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.51, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.35, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.72, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.27, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 2.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.89, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.85, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.11, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 2.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.06, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.26, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.35, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.72, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.35, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.72, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6880473354254739, "mean_inference_ms": 1.210219939048055, "mean_action_processing_ms": 0.1330240938839774, "mean_env_wait_ms": 0.8706293930800345, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 402.0, "episode_reward_min": 85.0, "episode_reward_mean": 283.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 212.0}, "policy_reward_mean": {"ppo": 141.76}, "hist_stats": {"episode_reward": [128.0, 206.0, 249.0, 242.0, 195.0, 196.0, 170.0, 293.0, 295.0, 310.0, 244.0, 345.0, 252.0, 85.0, 287.0, 161.0, 342.0, 87.0, 333.0, 255.0, 190.0, 298.0, 163.0, 296.0, 285.0, 286.0, 306.0, 298.0, 284.0, 381.0, 292.0, 117.0, 336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [69.0, 59.0, 90.0, 116.0, 116.0, 133.0, 120.0, 122.0, 96.0, 99.0, 100.0, 96.0, 73.0, 97.0, 128.0, 165.0, 144.0, 151.0, 156.0, 154.0, 127.0, 117.0, 179.0, 166.0, 108.0, 144.0, 45.0, 40.0, 131.0, 156.0, 82.0, 79.0, 162.0, 180.0, 34.0, 53.0, 169.0, 164.0, 135.0, 120.0, 99.0, 91.0, 150.0, 148.0, 65.0, 98.0, 162.0, 134.0, 140.0, 145.0, 132.0, 154.0, 144.0, 162.0, 148.0, 150.0, 148.0, 136.0, 200.0, 181.0, 117.0, 175.0, 52.0, 65.0, 156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6880473354254739, "mean_inference_ms": 1.210219939048055, "mean_action_processing_ms": 0.1330240938839774, "mean_env_wait_ms": 0.8706293930800345, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1484800, "num_agent_steps_trained": 1484800, "num_env_steps_sampled": 742400, "num_env_steps_trained": 742400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 742400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1484800, "timers": {"training_iteration_time_ms": 3684.015, "learn_time_ms": 1122.652, "learn_throughput": 11401.576, "synch_weights_time_ms": 12.325}, "counters": {"num_env_steps_sampled": 742400, "num_env_steps_trained": 742400, "num_agent_steps_sampled": 1484800, "num_agent_steps_trained": 1484800}, "done": false, "episodes_total": 1856, "training_iteration": 58, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-42", "timestamp": 1666580622, "time_this_iter_s": 3.8663227558135986, "time_total_s": 218.80490517616272, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 218.80490517616272, "timesteps_since_restore": 0, "iterations_since_restore": 58, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.733333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 100.6, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.3, "shaped_reward_min": 42, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.13, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 11.47, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.6, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 10.85, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.57, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.98, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.84, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.39, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.89, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.57, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.57, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.938894007304805e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.00216166814789176, "policy_loss": -0.002247137250378728, "vf_loss": 7.820727348327637, "vf_explained_var": 0.620682954788208, "kl": 0.0020027090795338154, "entropy": 1.3932123184204102, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 755200, "num_env_steps_trained": 755200, "num_agent_steps_sampled": 1510400, "num_agent_steps_trained": 1510400}, "sampler_results": {"episode_reward_max": 404.0, "episode_reward_min": 77.0, "episode_reward_mean": 296.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 218.0}, "policy_reward_mean": {"ppo": 148.25}, "custom_metrics": {"sparse_reward_mean": 100.6, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.3, "shaped_reward_min": 42, "shaped_reward_max": 124, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.13, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 11.47, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.6, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 10.85, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.5, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.57, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 17, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.98, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.84, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.39, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.11, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.89, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.57, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 17, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.57, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 17, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6879893960396974, "mean_inference_ms": 1.210747046238731, "mean_action_processing_ms": 0.13300267223965467, "mean_env_wait_ms": 0.8702240867475001, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 404.0, "episode_reward_min": 77.0, "episode_reward_mean": 296.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 218.0}, "policy_reward_mean": {"ppo": 148.25}, "hist_stats": {"episode_reward": [336.0, 276.0, 338.0, 390.0, 287.0, 399.0, 384.0, 347.0, 247.0, 241.0, 174.0, 341.0, 332.0, 293.0, 292.0, 341.0, 330.0, 98.0, 338.0, 401.0, 312.0, 381.0, 236.0, 230.0, 284.0, 344.0, 351.0, 355.0, 298.0, 324.0, 220.0, 147.0, 175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [156.0, 180.0, 137.0, 139.0, 164.0, 174.0, 200.0, 190.0, 135.0, 152.0, 202.0, 197.0, 185.0, 199.0, 177.0, 170.0, 122.0, 125.0, 106.0, 135.0, 97.0, 77.0, 163.0, 178.0, 169.0, 163.0, 144.0, 149.0, 149.0, 143.0, 165.0, 176.0, 170.0, 160.0, 45.0, 53.0, 173.0, 165.0, 212.0, 189.0, 154.0, 158.0, 172.0, 209.0, 119.0, 117.0, 124.0, 106.0, 140.0, 144.0, 174.0, 170.0, 174.0, 177.0, 176.0, 179.0, 128.0, 170.0, 157.0, 167.0, 115.0, 105.0, 72.0, 75.0, 80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6879893960396974, "mean_inference_ms": 1.210747046238731, "mean_action_processing_ms": 0.13300267223965467, "mean_env_wait_ms": 0.8702240867475001, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1510400, "num_agent_steps_trained": 1510400, "num_env_steps_sampled": 755200, "num_env_steps_trained": 755200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 755200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1510400, "timers": {"training_iteration_time_ms": 3682.605, "learn_time_ms": 1126.084, "learn_throughput": 11366.825, "synch_weights_time_ms": 12.668}, "counters": {"num_env_steps_sampled": 755200, "num_env_steps_trained": 755200, "num_agent_steps_sampled": 1510400, "num_agent_steps_trained": 1510400}, "done": false, "episodes_total": 1888, "training_iteration": 59, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-46", "timestamp": 1666580626, "time_this_iter_s": 3.6895713806152344, "time_total_s": 222.49447655677795, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 222.49447655677795, "timesteps_since_restore": 0, "iterations_since_restore": 59, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 101.2, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.08, "shaped_reward_min": 34, "shaped_reward_max": 128, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.35, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.07, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 9.85, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.36, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.77, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.67, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.93, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.1, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.88, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.77, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.67, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.77, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.67, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.4694470036524025e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.003538618329912424, "policy_loss": -0.003653008723631501, "vf_loss": 8.01357650756836, "vf_explained_var": 0.6434005498886108, "kl": 0.0017745888326317072, "entropy": 1.3739397525787354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 768000, "num_env_steps_trained": 768000, "num_agent_steps_sampled": 1536000, "num_agent_steps_trained": 1536000}, "sampler_results": {"episode_reward_max": 408.0, "episode_reward_min": 74.0, "episode_reward_mean": 297.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 218.0}, "policy_reward_mean": {"ppo": 148.74}, "custom_metrics": {"sparse_reward_mean": 101.2, "sparse_reward_min": 0, "sparse_reward_max": 140, "shaped_reward_mean": 95.08, "shaped_reward_min": 34, "shaped_reward_max": 128, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.35, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.07, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 9.85, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.36, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 8.77, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 9.67, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 16, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.44, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 2.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.95, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.93, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.3, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.1, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.88, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.14, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 5, "optimal_onion_potting_agent_0_mean": 8.77, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 9.67, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 16, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.77, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 9.67, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 16, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6878877620979216, "mean_inference_ms": 1.2112749680126509, "mean_action_processing_ms": 0.13297599011569022, "mean_env_wait_ms": 0.8697325368255246, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 408.0, "episode_reward_min": 74.0, "episode_reward_mean": 297.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 218.0}, "policy_reward_mean": {"ppo": 148.74}, "hist_stats": {"episode_reward": [175.0, 316.0, 287.0, 269.0, 281.0, 352.0, 336.0, 287.0, 395.0, 241.0, 231.0, 354.0, 229.0, 402.0, 291.0, 273.0, 347.0, 298.0, 247.0, 336.0, 399.0, 338.0, 296.0, 313.0, 336.0, 256.0, 87.0, 344.0, 273.0, 250.0, 333.0, 276.0, 296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [80.0, 95.0, 143.0, 173.0, 145.0, 142.0, 134.0, 135.0, 139.0, 142.0, 167.0, 185.0, 161.0, 175.0, 136.0, 151.0, 203.0, 192.0, 125.0, 116.0, 127.0, 104.0, 172.0, 182.0, 99.0, 130.0, 200.0, 202.0, 145.0, 146.0, 148.0, 125.0, 159.0, 188.0, 135.0, 163.0, 130.0, 117.0, 167.0, 169.0, 201.0, 198.0, 157.0, 181.0, 142.0, 154.0, 162.0, 151.0, 165.0, 171.0, 136.0, 120.0, 31.0, 56.0, 158.0, 186.0, 133.0, 140.0, 131.0, 119.0, 176.0, 157.0, 130.0, 146.0, 139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6878877620979216, "mean_inference_ms": 1.2112749680126509, "mean_action_processing_ms": 0.13297599011569022, "mean_env_wait_ms": 0.8697325368255246, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1536000, "num_agent_steps_trained": 1536000, "num_env_steps_sampled": 768000, "num_env_steps_trained": 768000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 768000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1536000, "timers": {"training_iteration_time_ms": 3674.237, "learn_time_ms": 1115.272, "learn_throughput": 11477.021, "synch_weights_time_ms": 12.3}, "counters": {"num_env_steps_sampled": 768000, "num_env_steps_trained": 768000, "num_agent_steps_sampled": 1536000, "num_agent_steps_trained": 1536000}, "done": false, "episodes_total": 1920, "training_iteration": 60, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-50", "timestamp": 1666580630, "time_this_iter_s": 3.67628812789917, "time_total_s": 226.17076468467712, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 226.17076468467712, "timesteps_since_restore": 0, "iterations_since_restore": 60, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.86666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 108.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 97.08, "shaped_reward_min": 34, "shaped_reward_max": 128, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.26, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.58, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.87, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.88, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.17, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 1.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.91, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 8.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.17, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.17, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.7347235018262012e-19, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008456122595816851, "policy_loss": -0.001012232038192451, "vf_loss": 8.314811706542969, "vf_explained_var": 0.6157503128051758, "kl": 0.0020367184188216925, "entropy": 1.3297278881072998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 780800, "num_env_steps_trained": 780800, "num_agent_steps_sampled": 1561600, "num_agent_steps_trained": 1561600}, "sampler_results": {"episode_reward_max": 441.0, "episode_reward_min": 74.0, "episode_reward_mean": 313.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 156.94}, "custom_metrics": {"sparse_reward_mean": 108.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 97.08, "shaped_reward_min": 34, "shaped_reward_max": 128, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.26, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.58, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 9.87, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 10.88, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 7, "onion_drop_agent_1_mean": 1.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.46, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 8.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.17, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.4, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 2.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 1.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.91, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 2.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 2.77, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.15, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 8.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.17, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 8.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.17, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6878282523618396, "mean_inference_ms": 1.211237416116329, "mean_action_processing_ms": 0.1329784454803539, "mean_env_wait_ms": 0.8689713930511266, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 441.0, "episode_reward_min": 74.0, "episode_reward_mean": 313.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 156.94}, "hist_stats": {"episode_reward": [296.0, 290.0, 390.0, 390.0, 336.0, 327.0, 244.0, 141.0, 287.0, 231.0, 316.0, 253.0, 288.0, 298.0, 298.0, 381.0, 171.0, 162.0, 234.0, 342.0, 384.0, 304.0, 381.0, 399.0, 298.0, 295.0, 347.0, 182.0, 301.0, 339.0, 244.0, 296.0, 350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [139.0, 157.0, 144.0, 146.0, 195.0, 195.0, 198.0, 192.0, 172.0, 164.0, 170.0, 157.0, 127.0, 117.0, 70.0, 71.0, 143.0, 144.0, 115.0, 116.0, 164.0, 152.0, 128.0, 125.0, 148.0, 140.0, 162.0, 136.0, 136.0, 162.0, 163.0, 218.0, 82.0, 89.0, 79.0, 83.0, 116.0, 118.0, 180.0, 162.0, 203.0, 181.0, 142.0, 162.0, 192.0, 189.0, 206.0, 193.0, 152.0, 146.0, 145.0, 150.0, 178.0, 169.0, 94.0, 88.0, 136.0, 165.0, 181.0, 158.0, 134.0, 110.0, 135.0, 161.0, 165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6878282523618396, "mean_inference_ms": 1.211237416116329, "mean_action_processing_ms": 0.1329784454803539, "mean_env_wait_ms": 0.8689713930511266, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1561600, "num_agent_steps_trained": 1561600, "num_env_steps_sampled": 780800, "num_env_steps_trained": 780800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 780800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1561600, "timers": {"training_iteration_time_ms": 3664.703, "learn_time_ms": 1108.565, "learn_throughput": 11546.455, "synch_weights_time_ms": 12.646}, "counters": {"num_env_steps_sampled": 780800, "num_env_steps_trained": 780800, "num_agent_steps_sampled": 1561600, "num_agent_steps_trained": 1561600}, "done": false, "episodes_total": 1952, "training_iteration": 61, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-54", "timestamp": 1666580634, "time_this_iter_s": 3.7346341609954834, "time_total_s": 229.9053988456726, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 229.9053988456726, "timesteps_since_restore": 0, "iterations_since_restore": 61, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.92, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 113.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 102.17, "shaped_reward_min": 34, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.83, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 10.39, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.05, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.47, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.42, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.57, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.93, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.0, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 9.47, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.42, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.47, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.42, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 8.673617509131006e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.002962626051157713, "policy_loss": -0.003128279000520706, "vf_loss": 8.295660018920898, "vf_explained_var": 0.6091577410697937, "kl": 0.0019532288424670696, "entropy": 1.3278286457061768, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 793600, "num_env_steps_trained": 793600, "num_agent_steps_sampled": 1587200, "num_agent_steps_trained": 1587200}, "sampler_results": {"episode_reward_max": 441.0, "episode_reward_min": 74.0, "episode_reward_mean": 328.97, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 164.485}, "custom_metrics": {"sparse_reward_mean": 113.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 102.17, "shaped_reward_min": 34, "shaped_reward_max": 130, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.83, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 11.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 10.39, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 11.05, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.99, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.47, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 10.42, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 1.57, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 1.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 2.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.93, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.81, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.61, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.0, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.12, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 4, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 9.47, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 10.42, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.47, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 10.42, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6877589020473881, "mean_inference_ms": 1.210984106554491, "mean_action_processing_ms": 0.13297596780002988, "mean_env_wait_ms": 0.8682082840509878, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 441.0, "episode_reward_min": 74.0, "episode_reward_mean": 328.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 164.485}, "hist_stats": {"episode_reward": [350.0, 77.0, 404.0, 259.0, 290.0, 283.0, 293.0, 393.0, 408.0, 384.0, 381.0, 342.0, 290.0, 350.0, 299.0, 356.0, 230.0, 248.0, 273.0, 74.0, 330.0, 289.0, 299.0, 298.0, 188.0, 236.0, 393.0, 350.0, 336.0, 338.0, 349.0, 312.0, 120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [165.0, 185.0, 34.0, 43.0, 204.0, 200.0, 133.0, 126.0, 159.0, 131.0, 135.0, 148.0, 159.0, 134.0, 204.0, 189.0, 207.0, 201.0, 182.0, 202.0, 174.0, 207.0, 159.0, 183.0, 153.0, 137.0, 175.0, 175.0, 143.0, 156.0, 181.0, 175.0, 110.0, 120.0, 126.0, 122.0, 138.0, 135.0, 43.0, 31.0, 148.0, 182.0, 146.0, 143.0, 154.0, 145.0, 149.0, 149.0, 83.0, 105.0, 134.0, 102.0, 186.0, 207.0, 169.0, 181.0, 174.0, 162.0, 174.0, 164.0, 173.0, 176.0, 164.0, 148.0, 57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6877589020473881, "mean_inference_ms": 1.210984106554491, "mean_action_processing_ms": 0.13297596780002988, "mean_env_wait_ms": 0.8682082840509878, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1587200, "num_agent_steps_trained": 1587200, "num_env_steps_sampled": 793600, "num_env_steps_trained": 793600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 793600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1587200, "timers": {"training_iteration_time_ms": 3665.294, "learn_time_ms": 1113.003, "learn_throughput": 11500.417, "synch_weights_time_ms": 12.408}, "counters": {"num_env_steps_sampled": 793600, "num_env_steps_trained": 793600, "num_agent_steps_sampled": 1587200, "num_agent_steps_trained": 1587200}, "done": false, "episodes_total": 1984, "training_iteration": 62, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-03-58", "timestamp": 1666580638, "time_this_iter_s": 3.70135235786438, "time_total_s": 233.606751203537, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 233.606751203537, "timesteps_since_restore": 0, "iterations_since_restore": 62, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 119.8, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 106.01, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.04, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.38, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 10.59, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 11.88, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.59, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.59, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.59, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.59, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.336808754565503e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.003959214314818382, "policy_loss": -0.004130211658775806, "vf_loss": 8.356800079345703, "vf_explained_var": 0.6421783566474915, "kl": 0.0018275229958817363, "entropy": 1.3293664455413818, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 806400, "num_env_steps_trained": 806400, "num_agent_steps_sampled": 1612800, "num_agent_steps_trained": 1612800}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 120.0, "episode_reward_mean": 345.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 172.805}, "custom_metrics": {"sparse_reward_mean": 119.8, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 106.01, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.04, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.38, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 10.59, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 11.88, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.59, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 16, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.9, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.87, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 10, "useful_dish_drop_agent_1_mean": 0.79, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.24, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.59, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.03, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 5, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.59, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.59, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6877425590338123, "mean_inference_ms": 1.2107950864096755, "mean_action_processing_ms": 0.13298834860070177, "mean_env_wait_ms": 0.8675140813460829, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 120.0, "episode_reward_mean": 345.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 54.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 172.805}, "hist_stats": {"episode_reward": [120.0, 336.0, 361.0, 336.0, 402.0, 390.0, 174.0, 395.0, 336.0, 396.0, 345.0, 255.0, 396.0, 384.0, 190.0, 441.0, 301.0, 343.0, 344.0, 393.0, 356.0, 347.0, 351.0, 346.0, 438.0, 438.0, 387.0, 344.0, 345.0, 344.0, 396.0, 342.0, 287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [57.0, 63.0, 170.0, 166.0, 171.0, 190.0, 167.0, 169.0, 202.0, 200.0, 185.0, 205.0, 80.0, 94.0, 195.0, 200.0, 164.0, 172.0, 201.0, 195.0, 163.0, 182.0, 121.0, 134.0, 203.0, 193.0, 171.0, 213.0, 104.0, 86.0, 200.0, 241.0, 136.0, 165.0, 171.0, 172.0, 185.0, 159.0, 198.0, 195.0, 166.0, 190.0, 167.0, 180.0, 163.0, 188.0, 186.0, 160.0, 226.0, 212.0, 224.0, 214.0, 187.0, 200.0, 174.0, 170.0, 166.0, 179.0, 181.0, 163.0, 186.0, 210.0, 186.0, 156.0, 159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6877425590338123, "mean_inference_ms": 1.2107950864096755, "mean_action_processing_ms": 0.13298834860070177, "mean_env_wait_ms": 0.8675140813460829, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1612800, "num_agent_steps_trained": 1612800, "num_env_steps_sampled": 806400, "num_env_steps_trained": 806400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 806400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1612800, "timers": {"training_iteration_time_ms": 3656.391, "learn_time_ms": 1105.489, "learn_throughput": 11578.584, "synch_weights_time_ms": 12.386}, "counters": {"num_env_steps_sampled": 806400, "num_env_steps_trained": 806400, "num_agent_steps_sampled": 1612800, "num_agent_steps_trained": 1612800}, "done": false, "episodes_total": 2016, "training_iteration": 63, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-01", "timestamp": 1666580641, "time_this_iter_s": 3.72748064994812, "time_total_s": 237.3342318534851, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 237.3342318534851, "timesteps_since_restore": 0, "iterations_since_restore": 63, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.0, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 122.4, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 108.28, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.6, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.04, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 11.13, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 11.64, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 10.06, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 10.95, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.76, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.6, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 10.06, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 10.95, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.06, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 10.95, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.1684043772827515e-20, "cur_lr": 0.0010000000474974513, "total_loss": -0.004289156757295132, "policy_loss": -0.004483077675104141, "vf_loss": 8.452171325683594, "vf_explained_var": 0.6298333406448364, "kl": 0.001715940423309803, "entropy": 1.3025939464569092, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 819200, "num_env_steps_trained": 819200, "num_agent_steps_sampled": 1638400, "num_agent_steps_trained": 1638400}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 80.0, "episode_reward_mean": 353.08, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 238.0}, "policy_reward_mean": {"ppo": 176.54}, "custom_metrics": {"sparse_reward_mean": 122.4, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 108.28, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.6, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.04, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 11.13, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 11.64, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 17, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.73, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 10.06, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 10.95, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 1.76, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.69, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.82, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.75, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 3.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 6, "soup_delivery_agent_0_mean": 3.6, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 3.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 10.06, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 10.95, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.06, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 10.95, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6876820593781335, "mean_inference_ms": 1.2105886481957269, "mean_action_processing_ms": 0.13299015160909294, "mean_env_wait_ms": 0.8668253107554783, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 80.0, "episode_reward_mean": 353.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 238.0}, "policy_reward_mean": {"ppo": 176.54}, "hist_stats": {"episode_reward": [287.0, 305.0, 338.0, 239.0, 298.0, 347.0, 207.0, 344.0, 370.0, 384.0, 407.0, 407.0, 313.0, 402.0, 341.0, 339.0, 396.0, 393.0, 250.0, 396.0, 298.0, 390.0, 327.0, 347.0, 361.0, 390.0, 390.0, 261.0, 297.0, 401.0, 307.0, 290.0, 407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [159.0, 128.0, 155.0, 150.0, 165.0, 173.0, 128.0, 111.0, 148.0, 150.0, 160.0, 187.0, 101.0, 106.0, 176.0, 168.0, 179.0, 191.0, 194.0, 190.0, 201.0, 206.0, 202.0, 205.0, 158.0, 155.0, 206.0, 196.0, 182.0, 159.0, 154.0, 185.0, 197.0, 199.0, 191.0, 202.0, 123.0, 127.0, 203.0, 193.0, 156.0, 142.0, 202.0, 188.0, 158.0, 169.0, 180.0, 167.0, 185.0, 176.0, 216.0, 174.0, 196.0, 194.0, 130.0, 131.0, 146.0, 151.0, 215.0, 186.0, 153.0, 154.0, 153.0, 137.0, 203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6876820593781335, "mean_inference_ms": 1.2105886481957269, "mean_action_processing_ms": 0.13299015160909294, "mean_env_wait_ms": 0.8668253107554783, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1638400, "num_agent_steps_trained": 1638400, "num_env_steps_sampled": 819200, "num_env_steps_trained": 819200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 819200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1638400, "timers": {"training_iteration_time_ms": 3652.492, "learn_time_ms": 1103.358, "learn_throughput": 11600.947, "synch_weights_time_ms": 13.037}, "counters": {"num_env_steps_sampled": 819200, "num_env_steps_trained": 819200, "num_agent_steps_sampled": 1638400, "num_agent_steps_trained": 1638400}, "done": false, "episodes_total": 2048, "training_iteration": 64, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-05", "timestamp": 1666580645, "time_this_iter_s": 3.637770414352417, "time_total_s": 240.97200226783752, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 240.97200226783752, "timesteps_since_restore": 0, "iterations_since_restore": 64, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666668, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 124.2, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 109.47, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.15, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.57, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 10.67, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.1, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.49, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.8, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.39, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.56, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.8, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.39, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.8, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.39, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.0842021886413758e-20, "cur_lr": 0.0010000000474974513, "total_loss": 0.00183533423114568, "policy_loss": 0.001654433086514473, "vf_loss": 8.310755729675293, "vf_explained_var": 0.6230897903442383, "kl": 0.002343923319131136, "entropy": 1.3003530502319336, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 832000, "num_env_steps_trained": 832000, "num_agent_steps_sampled": 1664000, "num_agent_steps_trained": 1664000}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 80.0, "episode_reward_mean": 357.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 178.935}, "custom_metrics": {"sparse_reward_mean": 124.2, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 109.47, "shaped_reward_min": 40, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.15, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 12.57, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 18, "useful_onion_pickup_agent_0_mean": 10.67, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.1, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.49, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.42, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.8, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.39, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.68, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.84, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.56, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.16, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.8, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.39, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.8, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.39, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6875963042536799, "mean_inference_ms": 1.2103686825657565, "mean_action_processing_ms": 0.13298642621311788, "mean_env_wait_ms": 0.8660832998378942, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 80.0, "episode_reward_mean": 357.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 178.935}, "hist_stats": {"episode_reward": [407.0, 293.0, 344.0, 257.0, 344.0, 413.0, 301.0, 350.0, 307.0, 399.0, 353.0, 396.0, 399.0, 405.0, 384.0, 292.0, 462.0, 401.0, 295.0, 345.0, 353.0, 398.0, 416.0, 390.0, 387.0, 342.0, 341.0, 362.0, 290.0, 396.0, 399.0, 387.0, 133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 204.0, 137.0, 156.0, 174.0, 170.0, 122.0, 135.0, 174.0, 170.0, 196.0, 217.0, 141.0, 160.0, 169.0, 181.0, 155.0, 152.0, 202.0, 197.0, 182.0, 171.0, 194.0, 202.0, 204.0, 195.0, 194.0, 211.0, 183.0, 201.0, 160.0, 132.0, 229.0, 233.0, 211.0, 190.0, 152.0, 143.0, 176.0, 169.0, 170.0, 183.0, 196.0, 202.0, 221.0, 195.0, 179.0, 211.0, 189.0, 198.0, 156.0, 186.0, 176.0, 165.0, 180.0, 182.0, 143.0, 147.0, 212.0, 184.0, 191.0, 208.0, 190.0, 197.0, 54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6875963042536799, "mean_inference_ms": 1.2103686825657565, "mean_action_processing_ms": 0.13298642621311788, "mean_env_wait_ms": 0.8660832998378942, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1664000, "num_agent_steps_trained": 1664000, "num_env_steps_sampled": 832000, "num_env_steps_trained": 832000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 832000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1664000, "timers": {"training_iteration_time_ms": 3636.033, "learn_time_ms": 1096.102, "learn_throughput": 11677.75, "synch_weights_time_ms": 13.065}, "counters": {"num_env_steps_sampled": 832000, "num_env_steps_trained": 832000, "num_agent_steps_sampled": 1664000, "num_agent_steps_trained": 1664000}, "done": false, "episodes_total": 2080, "training_iteration": 65, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-09", "timestamp": 1666580649, "time_this_iter_s": 3.55340576171875, "time_total_s": 244.52540802955627, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 244.52540802955627, "timesteps_since_restore": 0, "iterations_since_restore": 65, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.74, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 126.4, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 108.84, "shaped_reward_min": 40, "shaped_reward_max": 139, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 12.67, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 10.48, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.12, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 0.98, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.73, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.69, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.69, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.56, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.21, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.73, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.73, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.421010943206879e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016171643510460854, "policy_loss": -0.0018112201942130923, "vf_loss": 8.35134506225586, "vf_explained_var": 0.6184874773025513, "kl": 0.0019764369353652, "entropy": 1.2821624279022217, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 844800, "num_env_steps_trained": 844800, "num_agent_steps_sampled": 1689600, "num_agent_steps_trained": 1689600}, "sampler_results": {"episode_reward_max": 459.0, "episode_reward_min": 80.0, "episode_reward_mean": 361.64, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 180.82}, "custom_metrics": {"sparse_reward_mean": 126.4, "sparse_reward_min": 20, "sparse_reward_max": 160, "shaped_reward_mean": 108.84, "shaped_reward_min": 40, "shaped_reward_max": 139, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 10.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 18, "onion_pickup_agent_1_mean": 12.67, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 10.48, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 17, "useful_onion_pickup_agent_1_mean": 12.12, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 18, "onion_drop_agent_0_mean": 0.98, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.89, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.41, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 9.73, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 11.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.68, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 5, "useful_dish_pickup_agent_1_mean": 1.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.83, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.69, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.73, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.69, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.56, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.21, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 9.73, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 11.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.73, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 11.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6874455833951449, "mean_inference_ms": 1.2101589324370519, "mean_action_processing_ms": 0.13297401832247158, "mean_env_wait_ms": 0.8652994841084243, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 459.0, "episode_reward_min": 80.0, "episode_reward_mean": 361.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 241.0}, "policy_reward_mean": {"ppo": 180.82}, "hist_stats": {"episode_reward": [133.0, 344.0, 335.0, 247.0, 395.0, 450.0, 384.0, 387.0, 387.0, 404.0, 410.0, 279.0, 441.0, 447.0, 459.0, 336.0, 80.0, 390.0, 290.0, 247.0, 358.0, 336.0, 410.0, 318.0, 392.0, 387.0, 370.0, 298.0, 438.0, 353.0, 396.0, 438.0, 381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [54.0, 79.0, 167.0, 177.0, 170.0, 165.0, 115.0, 132.0, 195.0, 200.0, 238.0, 212.0, 179.0, 205.0, 200.0, 187.0, 192.0, 195.0, 206.0, 198.0, 200.0, 210.0, 137.0, 142.0, 233.0, 208.0, 219.0, 228.0, 229.0, 230.0, 165.0, 171.0, 34.0, 46.0, 201.0, 189.0, 139.0, 151.0, 133.0, 114.0, 179.0, 179.0, 160.0, 176.0, 210.0, 200.0, 150.0, 168.0, 207.0, 185.0, 194.0, 193.0, 193.0, 177.0, 135.0, 163.0, 214.0, 224.0, 180.0, 173.0, 205.0, 191.0, 207.0, 231.0, 169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6874455833951449, "mean_inference_ms": 1.2101589324370519, "mean_action_processing_ms": 0.13297401832247158, "mean_env_wait_ms": 0.8652994841084243, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1689600, "num_agent_steps_trained": 1689600, "num_env_steps_sampled": 844800, "num_env_steps_trained": 844800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 844800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1689600, "timers": {"training_iteration_time_ms": 3639.566, "learn_time_ms": 1098.766, "learn_throughput": 11649.427, "synch_weights_time_ms": 13.026}, "counters": {"num_env_steps_sampled": 844800, "num_env_steps_trained": 844800, "num_agent_steps_sampled": 1689600, "num_agent_steps_trained": 1689600}, "done": false, "episodes_total": 2112, "training_iteration": 66, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-13", "timestamp": 1666580653, "time_this_iter_s": 3.6690165996551514, "time_total_s": 248.19442462921143, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 248.19442462921143, "timesteps_since_restore": 0, "iterations_since_restore": 66, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 129.0, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 111.57, "shaped_reward_min": 59, "shaped_reward_max": 139, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.24, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 13.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 10.74, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.8, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 11.77, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 1.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.79, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 9.8, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 11.77, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.8, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 11.77, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.7105054716034394e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.00021978653967380524, "policy_loss": -0.0004263484152033925, "vf_loss": 8.452777862548828, "vf_explained_var": 0.6492389440536499, "kl": 0.002371986396610737, "entropy": 1.277435064315796, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 857600, "num_env_steps_trained": 857600, "num_agent_steps_sampled": 1715200, "num_agent_steps_trained": 1715200}, "sampler_results": {"episode_reward_max": 459.0, "episode_reward_min": 179.0, "episode_reward_mean": 369.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 74.0}, "policy_reward_max": {"ppo": 245.0}, "policy_reward_mean": {"ppo": 184.785}, "custom_metrics": {"sparse_reward_mean": 129.0, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 111.57, "shaped_reward_min": 59, "shaped_reward_max": 139, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.24, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 13.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 10.74, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 9.8, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 11.77, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 1.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.73, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 10, "dish_drop_agent_1_mean": 1.58, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.67, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.64, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.79, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 9.8, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 11.77, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 9.8, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 11.77, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6872752632050905, "mean_inference_ms": 1.2099295792073617, "mean_action_processing_ms": 0.1329654963865704, "mean_env_wait_ms": 0.8645353892883192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 459.0, "episode_reward_min": 179.0, "episode_reward_mean": 369.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 74.0}, "policy_reward_max": {"ppo": 245.0}, "policy_reward_mean": {"ppo": 184.785}, "hist_stats": {"episode_reward": [381.0, 355.0, 398.0, 405.0, 255.0, 344.0, 347.0, 344.0, 179.0, 330.0, 390.0, 299.0, 343.0, 356.0, 350.0, 362.0, 450.0, 304.0, 281.0, 396.0, 401.0, 356.0, 393.0, 392.0, 353.0, 361.0, 353.0, 342.0, 416.0, 387.0, 361.0, 447.0, 348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [169.0, 212.0, 184.0, 171.0, 203.0, 195.0, 197.0, 208.0, 119.0, 136.0, 173.0, 171.0, 178.0, 169.0, 167.0, 177.0, 74.0, 105.0, 168.0, 162.0, 187.0, 203.0, 158.0, 141.0, 177.0, 166.0, 168.0, 188.0, 163.0, 187.0, 178.0, 184.0, 241.0, 209.0, 152.0, 152.0, 134.0, 147.0, 191.0, 205.0, 195.0, 206.0, 174.0, 182.0, 188.0, 205.0, 188.0, 204.0, 173.0, 180.0, 178.0, 183.0, 181.0, 172.0, 174.0, 168.0, 186.0, 230.0, 187.0, 200.0, 194.0, 167.0, 217.0, 230.0, 168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6872752632050905, "mean_inference_ms": 1.2099295792073617, "mean_action_processing_ms": 0.1329654963865704, "mean_env_wait_ms": 0.8645353892883192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1715200, "num_agent_steps_trained": 1715200, "num_env_steps_sampled": 857600, "num_env_steps_trained": 857600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 857600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1715200, "timers": {"training_iteration_time_ms": 3630.775, "learn_time_ms": 1088.931, "learn_throughput": 11754.645, "synch_weights_time_ms": 12.476}, "counters": {"num_env_steps_sampled": 857600, "num_env_steps_trained": 857600, "num_agent_steps_sampled": 1715200, "num_agent_steps_trained": 1715200}, "done": false, "episodes_total": 2144, "training_iteration": 67, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-17", "timestamp": 1666580657, "time_this_iter_s": 3.681520700454712, "time_total_s": 251.87594532966614, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 251.87594532966614, "timesteps_since_restore": 0, "iterations_since_restore": 67, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 129.6, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 112.1, "shaped_reward_min": 65, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.87, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.09, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 11.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.34, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 10.21, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 11.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.91, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.44, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.21, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 11.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.21, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 11.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.3552527358017197e-21, "cur_lr": 0.0010000000474974513, "total_loss": -0.002791440347209573, "policy_loss": -0.00296858511865139, "vf_loss": 8.239323616027832, "vf_explained_var": 0.6559525728225708, "kl": 0.0022639285307377577, "entropy": 1.2935813665390015, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 870400, "num_env_steps_trained": 870400, "num_agent_steps_sampled": 1740800, "num_agent_steps_trained": 1740800}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 371.3, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 245.0}, "policy_reward_mean": {"ppo": 185.65}, "custom_metrics": {"sparse_reward_mean": 129.6, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 112.1, "shaped_reward_min": 65, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 11.87, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.09, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 11.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.34, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.64, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.75, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 10.21, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 11.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 1.91, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.55, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.54, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.44, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 10.21, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 11.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.21, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 11.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871706321490018, "mean_inference_ms": 1.2098465022841542, "mean_action_processing_ms": 0.13297585985904375, "mean_env_wait_ms": 0.8639868307798764, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 371.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 245.0}, "policy_reward_mean": {"ppo": 185.65}, "hist_stats": {"episode_reward": [348.0, 402.0, 349.0, 310.0, 292.0, 456.0, 390.0, 381.0, 341.0, 387.0, 384.0, 341.0, 345.0, 444.0, 444.0, 411.0, 401.0, 438.0, 282.0, 441.0, 344.0, 352.0, 408.0, 384.0, 384.0, 456.0, 348.0, 313.0, 399.0, 395.0, 281.0, 225.0, 390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [168.0, 180.0, 194.0, 208.0, 185.0, 164.0, 158.0, 152.0, 135.0, 157.0, 228.0, 228.0, 205.0, 185.0, 184.0, 197.0, 167.0, 174.0, 179.0, 208.0, 205.0, 179.0, 174.0, 167.0, 174.0, 171.0, 222.0, 222.0, 216.0, 228.0, 204.0, 207.0, 203.0, 198.0, 226.0, 212.0, 139.0, 143.0, 223.0, 218.0, 183.0, 161.0, 184.0, 168.0, 206.0, 202.0, 196.0, 188.0, 177.0, 207.0, 217.0, 239.0, 155.0, 193.0, 150.0, 163.0, 200.0, 199.0, 209.0, 186.0, 137.0, 144.0, 120.0, 105.0, 185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871706321490018, "mean_inference_ms": 1.2098465022841542, "mean_action_processing_ms": 0.13297585985904375, "mean_env_wait_ms": 0.8639868307798764, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1740800, "num_agent_steps_trained": 1740800, "num_env_steps_sampled": 870400, "num_env_steps_trained": 870400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 870400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1740800, "timers": {"training_iteration_time_ms": 3611.638, "learn_time_ms": 1084.248, "learn_throughput": 11805.418, "synch_weights_time_ms": 12.855}, "counters": {"num_env_steps_sampled": 870400, "num_env_steps_trained": 870400, "num_agent_steps_sampled": 1740800, "num_agent_steps_trained": 1740800}, "done": false, "episodes_total": 2176, "training_iteration": 68, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-21", "timestamp": 1666580661, "time_this_iter_s": 3.684041738510132, "time_total_s": 255.55998706817627, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 255.55998706817627, "timesteps_since_restore": 0, "iterations_since_restore": 68, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.349999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 132.2, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 114.32, "shaped_reward_min": 70, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 11.92, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.14, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.61, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.79, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 10.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.14, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.6, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.39, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.88, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.7, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.14, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.14, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.776263679008599e-22, "cur_lr": 0.0010000000474974513, "total_loss": -0.0012864982709288597, "policy_loss": -0.0014888072619214654, "vf_loss": 8.330397605895996, "vf_explained_var": 0.6499449014663696, "kl": 0.0019524524686858058, "entropy": 1.261465311050415, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 883200, "num_env_steps_trained": 883200, "num_agent_steps_sampled": 1766400, "num_agent_steps_trained": 1766400}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 378.72, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 249.0}, "policy_reward_mean": {"ppo": 189.36}, "custom_metrics": {"sparse_reward_mean": 132.2, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 114.32, "shaped_reward_min": 70, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 11.92, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.14, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 8, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.61, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.79, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 10.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.14, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.92, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.23, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.57, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.77, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.6, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.39, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.88, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.7, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.14, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.14, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.687156924098904, "mean_inference_ms": 1.2097514898289816, "mean_action_processing_ms": 0.13298900302876707, "mean_env_wait_ms": 0.8634657831003358, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 378.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 249.0}, "policy_reward_mean": {"ppo": 189.36}, "hist_stats": {"episode_reward": [390.0, 390.0, 390.0, 348.0, 447.0, 381.0, 315.0, 444.0, 390.0, 402.0, 393.0, 459.0, 450.0, 435.0, 249.0, 298.0, 410.0, 405.0, 435.0, 404.0, 399.0, 447.0, 396.0, 378.0, 447.0, 354.0, 270.0, 341.0, 399.0, 396.0, 285.0, 302.0, 350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [185.0, 205.0, 178.0, 212.0, 190.0, 200.0, 161.0, 187.0, 236.0, 211.0, 179.0, 202.0, 154.0, 161.0, 211.0, 233.0, 198.0, 192.0, 210.0, 192.0, 197.0, 196.0, 245.0, 214.0, 235.0, 215.0, 220.0, 215.0, 116.0, 133.0, 145.0, 153.0, 203.0, 207.0, 210.0, 195.0, 202.0, 233.0, 187.0, 217.0, 191.0, 208.0, 223.0, 224.0, 186.0, 210.0, 212.0, 166.0, 227.0, 220.0, 169.0, 185.0, 124.0, 146.0, 189.0, 152.0, 190.0, 209.0, 187.0, 209.0, 135.0, 150.0, 146.0, 156.0, 190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.687156924098904, "mean_inference_ms": 1.2097514898289816, "mean_action_processing_ms": 0.13298900302876707, "mean_env_wait_ms": 0.8634657831003358, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1766400, "num_agent_steps_trained": 1766400, "num_env_steps_sampled": 883200, "num_env_steps_trained": 883200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 883200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1766400, "timers": {"training_iteration_time_ms": 3615.423, "learn_time_ms": 1085.045, "learn_throughput": 11796.743, "synch_weights_time_ms": 12.673}, "counters": {"num_env_steps_sampled": 883200, "num_env_steps_trained": 883200, "num_agent_steps_sampled": 1766400, "num_agent_steps_trained": 1766400}, "done": false, "episodes_total": 2208, "training_iteration": 69, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-24", "timestamp": 1666580664, "time_this_iter_s": 3.738740921020508, "time_total_s": 259.2987279891968, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 259.2987279891968, "timesteps_since_restore": 0, "iterations_since_restore": 69, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.48, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 133.4, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 114.6, "shaped_reward_min": 70, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.96, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.34, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.32, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 11.79, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 11.39, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 10.75, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.75, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.39, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 10.75, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.39, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 10.75, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.3881318395042993e-22, "cur_lr": 0.0010000000474974513, "total_loss": -0.0034247650764882565, "policy_loss": -0.0036217491142451763, "vf_loss": 8.257144927978516, "vf_explained_var": 0.6091008186340332, "kl": 0.002094808267429471, "entropy": 1.2574673891067505, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 896000, "num_env_steps_trained": 896000, "num_agent_steps_sampled": 1792000, "num_agent_steps_trained": 1792000}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 381.4, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 190.7}, "custom_metrics": {"sparse_reward_mean": 133.4, "sparse_reward_min": 60, "sparse_reward_max": 160, "shaped_reward_mean": 114.6, "shaped_reward_min": 70, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.96, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.34, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.32, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 11.79, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.62, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 11.39, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 10.75, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.77, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.75, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.68, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.39, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 10.75, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.39, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 10.75, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871680708954622, "mean_inference_ms": 1.2096280272943736, "mean_action_processing_ms": 0.13298745342255253, "mean_env_wait_ms": 0.8628729261058392, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 190.0, "episode_reward_mean": 381.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 94.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 190.7}, "hist_stats": {"episode_reward": [350.0, 237.0, 419.0, 395.0, 288.0, 441.0, 290.0, 287.0, 453.0, 399.0, 381.0, 347.0, 405.0, 355.0, 381.0, 402.0, 290.0, 404.0, 247.0, 305.0, 347.0, 263.0, 462.0, 393.0, 450.0, 402.0, 190.0, 290.0, 444.0, 399.0, 396.0, 358.0, 387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [190.0, 160.0, 119.0, 118.0, 208.0, 211.0, 203.0, 192.0, 140.0, 148.0, 214.0, 227.0, 138.0, 152.0, 133.0, 154.0, 234.0, 219.0, 202.0, 197.0, 178.0, 203.0, 182.0, 165.0, 206.0, 199.0, 178.0, 177.0, 182.0, 199.0, 197.0, 205.0, 136.0, 154.0, 201.0, 203.0, 107.0, 140.0, 149.0, 156.0, 161.0, 186.0, 135.0, 128.0, 221.0, 241.0, 200.0, 193.0, 205.0, 245.0, 198.0, 204.0, 94.0, 96.0, 144.0, 146.0, 222.0, 222.0, 189.0, 210.0, 188.0, 208.0, 180.0, 178.0, 196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871680708954622, "mean_inference_ms": 1.2096280272943736, "mean_action_processing_ms": 0.13298745342255253, "mean_env_wait_ms": 0.8628729261058392, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1792000, "num_agent_steps_trained": 1792000, "num_env_steps_sampled": 896000, "num_env_steps_trained": 896000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 896000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1792000, "timers": {"training_iteration_time_ms": 3606.34, "learn_time_ms": 1082.566, "learn_throughput": 11823.76, "synch_weights_time_ms": 12.718}, "counters": {"num_env_steps_sampled": 896000, "num_env_steps_trained": 896000, "num_agent_steps_sampled": 1792000, "num_agent_steps_trained": 1792000}, "done": false, "episodes_total": 2240, "training_iteration": 70, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-28", "timestamp": 1666580668, "time_this_iter_s": 3.580299139022827, "time_total_s": 262.8790271282196, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 262.8790271282196, "timesteps_since_restore": 0, "iterations_since_restore": 70, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.740000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 135.6, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 115.26, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.69, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.6, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.04, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.68, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.23, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.13, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.68, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.23, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.13, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.23, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.13, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.6940659197521496e-22, "cur_lr": 0.0010000000474974513, "total_loss": -0.004838631488382816, "policy_loss": -0.0050162081606686115, "vf_loss": 8.161375999450684, "vf_explained_var": 0.6948555707931519, "kl": 0.0016221408732235432, "entropy": 1.277127981185913, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 908800, "num_env_steps_trained": 908800, "num_agent_steps_sampled": 1817600, "num_agent_steps_trained": 1817600}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 386.46, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 193.23}, "custom_metrics": {"sparse_reward_mean": 135.6, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 115.26, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.69, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 12.6, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.17, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.04, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.57, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.68, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.23, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.13, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.82, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 1.95, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.96, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.94, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.71, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.77, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.68, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.23, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.13, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.23, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.13, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871857253872651, "mean_inference_ms": 1.2093943881934999, "mean_action_processing_ms": 0.1329846227979475, "mean_env_wait_ms": 0.8622240571478598, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 386.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 193.23}, "hist_stats": {"episode_reward": [387.0, 393.0, 407.0, 348.0, 396.0, 441.0, 384.0, 299.0, 444.0, 381.0, 395.0, 456.0, 247.0, 416.0, 447.0, 341.0, 390.0, 449.0, 450.0, 450.0, 358.0, 390.0, 456.0, 342.0, 293.0, 387.0, 330.0, 462.0, 410.0, 362.0, 408.0, 387.0, 459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [196.0, 191.0, 193.0, 200.0, 200.0, 207.0, 164.0, 184.0, 205.0, 191.0, 228.0, 213.0, 203.0, 181.0, 156.0, 143.0, 214.0, 230.0, 204.0, 177.0, 187.0, 208.0, 229.0, 227.0, 122.0, 125.0, 214.0, 202.0, 220.0, 227.0, 166.0, 175.0, 188.0, 202.0, 230.0, 219.0, 225.0, 225.0, 222.0, 228.0, 188.0, 170.0, 175.0, 215.0, 231.0, 225.0, 173.0, 169.0, 150.0, 143.0, 195.0, 192.0, 156.0, 174.0, 228.0, 234.0, 190.0, 220.0, 172.0, 190.0, 206.0, 202.0, 191.0, 196.0, 249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871857253872651, "mean_inference_ms": 1.2093943881934999, "mean_action_processing_ms": 0.1329846227979475, "mean_env_wait_ms": 0.8622240571478598, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1817600, "num_agent_steps_trained": 1817600, "num_env_steps_sampled": 908800, "num_env_steps_trained": 908800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 908800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1817600, "timers": {"training_iteration_time_ms": 3594.621, "learn_time_ms": 1077.333, "learn_throughput": 11881.19, "synch_weights_time_ms": 12.178}, "counters": {"num_env_steps_sampled": 908800, "num_env_steps_trained": 908800, "num_agent_steps_sampled": 1817600, "num_agent_steps_trained": 1817600}, "done": false, "episodes_total": 2272, "training_iteration": 71, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-32", "timestamp": 1666580672, "time_this_iter_s": 3.6173627376556396, "time_total_s": 266.49638986587524, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 266.49638986587524, "timesteps_since_restore": 0, "iterations_since_restore": 71, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.983333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 134.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 115.68, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.1, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 13.1, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 11.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.51, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.6, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 11.78, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.88, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.95, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.6, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 11.78, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.6, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 11.78, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 8.470329598760748e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.001648187288083136, "policy_loss": -0.0018436491955071688, "vf_loss": 8.266682624816895, "vf_explained_var": 0.6168429851531982, "kl": 0.001819998025894165, "entropy": 1.2624164819717407, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 921600, "num_env_steps_trained": 921600, "num_agent_steps_sampled": 1843200, "num_agent_steps_trained": 1843200}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 384.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 192.24}, "custom_metrics": {"sparse_reward_mean": 134.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 115.68, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.1, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 19, "onion_pickup_agent_1_mean": 13.1, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 11.48, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.51, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.6, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 11.78, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.18, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.8, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.57, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.88, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.36, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.95, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 6, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.6, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 11.78, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.6, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 11.78, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871988687200685, "mean_inference_ms": 1.2092524040203814, "mean_action_processing_ms": 0.13299117900125684, "mean_env_wait_ms": 0.8617021906416065, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 384.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 253.0}, "policy_reward_mean": {"ppo": 192.24}, "hist_stats": {"episode_reward": [459.0, 393.0, 399.0, 396.0, 250.0, 393.0, 407.0, 408.0, 390.0, 450.0, 387.0, 401.0, 390.0, 453.0, 333.0, 384.0, 401.0, 444.0, 341.0, 395.0, 318.0, 462.0, 339.0, 344.0, 450.0, 413.0, 339.0, 456.0, 387.0, 336.0, 381.0, 407.0, 413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 210.0, 196.0, 197.0, 196.0, 203.0, 194.0, 202.0, 134.0, 116.0, 191.0, 202.0, 194.0, 213.0, 203.0, 205.0, 185.0, 205.0, 241.0, 209.0, 212.0, 175.0, 196.0, 205.0, 188.0, 202.0, 229.0, 224.0, 175.0, 158.0, 204.0, 180.0, 190.0, 211.0, 238.0, 206.0, 179.0, 162.0, 199.0, 196.0, 173.0, 145.0, 226.0, 236.0, 176.0, 163.0, 182.0, 162.0, 197.0, 253.0, 189.0, 224.0, 173.0, 166.0, 222.0, 234.0, 212.0, 175.0, 168.0, 168.0, 187.0, 194.0, 200.0, 207.0, 214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871988687200685, "mean_inference_ms": 1.2092524040203814, "mean_action_processing_ms": 0.13299117900125684, "mean_env_wait_ms": 0.8617021906416065, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1843200, "num_agent_steps_trained": 1843200, "num_env_steps_sampled": 921600, "num_env_steps_trained": 921600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 921600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1843200, "timers": {"training_iteration_time_ms": 3591.057, "learn_time_ms": 1068.352, "learn_throughput": 11981.073, "synch_weights_time_ms": 12.227}, "counters": {"num_env_steps_sampled": 921600, "num_env_steps_trained": 921600, "num_agent_steps_sampled": 1843200, "num_agent_steps_trained": 1843200}, "done": false, "episodes_total": 2304, "training_iteration": 72, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-36", "timestamp": 1666580676, "time_this_iter_s": 3.6749939918518066, "time_total_s": 270.17138385772705, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 270.17138385772705, "timesteps_since_restore": 0, "iterations_since_restore": 72, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 132.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 116.34, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.0, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.55, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.81, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 12.05, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 4.02, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.42, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.93, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 12.05, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 12.05, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.235164799380374e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015343400882557034, "policy_loss": -0.0017145425081253052, "vf_loss": 8.12104606628418, "vf_explained_var": 0.6849566698074341, "kl": 0.0017699010204523802, "entropy": 1.2638133764266968, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 934400, "num_env_steps_trained": 934400, "num_agent_steps_sampled": 1868800, "num_agent_steps_trained": 1868800}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 381.14, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 237.0}, "policy_reward_mean": {"ppo": 190.57}, "custom_metrics": {"sparse_reward_mean": 132.4, "sparse_reward_min": 0, "sparse_reward_max": 160, "shaped_reward_mean": 116.34, "shaped_reward_min": 12, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.0, "onion_pickup_agent_0_min": 0, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.55, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.3, "useful_onion_pickup_agent_0_min": 0, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.81, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.37, "potting_onion_agent_0_min": 0, "potting_onion_agent_0_max": 16, "potting_onion_agent_1_mean": 12.05, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.21, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 1.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 5, "dish_drop_agent_0_mean": 1.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.85, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.7, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.83, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 4.02, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.42, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.93, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.27, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.37, "optimal_onion_potting_agent_0_min": 0, "optimal_onion_potting_agent_0_max": 16, "optimal_onion_potting_agent_1_mean": 12.05, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.37, "viable_onion_potting_agent_0_min": 0, "viable_onion_potting_agent_0_max": 16, "viable_onion_potting_agent_1_mean": 12.05, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871159082430706, "mean_inference_ms": 1.2095804059053032, "mean_action_processing_ms": 0.1329925772348698, "mean_env_wait_ms": 0.8617921215158202, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 12.0, "episode_reward_mean": 381.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 237.0}, "policy_reward_mean": {"ppo": 190.57}, "hist_stats": {"episode_reward": [413.0, 456.0, 390.0, 399.0, 295.0, 352.0, 396.0, 444.0, 444.0, 405.0, 444.0, 444.0, 402.0, 292.0, 393.0, 398.0, 401.0, 387.0, 401.0, 441.0, 393.0, 413.0, 12.0, 358.0, 453.0, 321.0, 358.0, 367.0, 402.0, 441.0, 399.0, 352.0, 450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [214.0, 199.0, 228.0, 228.0, 195.0, 195.0, 190.0, 209.0, 155.0, 140.0, 176.0, 176.0, 204.0, 192.0, 231.0, 213.0, 219.0, 225.0, 199.0, 206.0, 216.0, 228.0, 216.0, 228.0, 204.0, 198.0, 149.0, 143.0, 190.0, 203.0, 191.0, 207.0, 190.0, 211.0, 182.0, 205.0, 210.0, 191.0, 222.0, 219.0, 187.0, 206.0, 202.0, 211.0, 3.0, 9.0, 182.0, 176.0, 219.0, 234.0, 168.0, 153.0, 188.0, 170.0, 192.0, 175.0, 206.0, 196.0, 221.0, 220.0, 216.0, 183.0, 189.0, 163.0, 223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6871159082430706, "mean_inference_ms": 1.2095804059053032, "mean_action_processing_ms": 0.1329925772348698, "mean_env_wait_ms": 0.8617921215158202, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1868800, "num_agent_steps_trained": 1868800, "num_env_steps_sampled": 934400, "num_env_steps_trained": 934400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 934400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1868800, "timers": {"training_iteration_time_ms": 3617.965, "learn_time_ms": 1066.048, "learn_throughput": 12006.96, "synch_weights_time_ms": 12.585}, "counters": {"num_env_steps_sampled": 934400, "num_env_steps_trained": 934400, "num_agent_steps_sampled": 1868800, "num_agent_steps_trained": 1868800}, "done": false, "episodes_total": 2336, "training_iteration": 73, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-40", "timestamp": 1666580680, "time_this_iter_s": 3.999202013015747, "time_total_s": 274.1705858707428, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 274.1705858707428, "timesteps_since_restore": 0, "iterations_since_restore": 73, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.833333333333332, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 133.0, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 116.89, "shaped_reward_min": 61, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.08, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.47, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.36, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.77, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.47, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.01, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.47, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.01, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.47, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.01, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.117582399690187e-23, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002928805770352483, "policy_loss": 0.0001178687671199441, "vf_loss": 8.05293083190918, "vf_explained_var": 0.6890039443969727, "kl": 0.001798928715288639, "entropy": 1.2605663537979126, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 947200, "num_env_steps_trained": 947200, "num_agent_steps_sampled": 1894400, "num_agent_steps_trained": 1894400}, "sampler_results": {"episode_reward_max": 462.0, "episode_reward_min": 141.0, "episode_reward_mean": 382.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 59.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 191.445}, "custom_metrics": {"sparse_reward_mean": 133.0, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 116.89, "shaped_reward_min": 61, "shaped_reward_max": 142, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.08, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.47, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.36, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 18, "useful_onion_pickup_agent_1_mean": 12.77, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.7, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.47, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 12.01, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.16, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.68, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.8, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.88, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 10.47, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 12.01, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.47, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 12.01, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686933826173813, "mean_inference_ms": 1.2098385795333195, "mean_action_processing_ms": 0.13296885818111198, "mean_env_wait_ms": 0.861716192870976, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 462.0, "episode_reward_min": 141.0, "episode_reward_mean": 382.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 59.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 191.445}, "hist_stats": {"episode_reward": [450.0, 141.0, 384.0, 393.0, 339.0, 373.0, 370.0, 441.0, 408.0, 387.0, 341.0, 402.0, 400.0, 401.0, 396.0, 333.0, 441.0, 387.0, 407.0, 350.0, 401.0, 411.0, 390.0, 398.0, 407.0, 367.0, 291.0, 362.0, 392.0, 453.0, 462.0, 355.0, 347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [223.0, 227.0, 82.0, 59.0, 178.0, 206.0, 193.0, 200.0, 170.0, 169.0, 189.0, 184.0, 185.0, 185.0, 227.0, 214.0, 201.0, 207.0, 182.0, 205.0, 160.0, 181.0, 203.0, 199.0, 189.0, 211.0, 197.0, 204.0, 195.0, 201.0, 154.0, 179.0, 216.0, 225.0, 190.0, 197.0, 222.0, 185.0, 177.0, 173.0, 205.0, 196.0, 208.0, 203.0, 198.0, 192.0, 189.0, 209.0, 199.0, 208.0, 190.0, 177.0, 165.0, 126.0, 174.0, 188.0, 196.0, 196.0, 235.0, 218.0, 237.0, 225.0, 164.0, 191.0, 188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686933826173813, "mean_inference_ms": 1.2098385795333195, "mean_action_processing_ms": 0.13296885818111198, "mean_env_wait_ms": 0.861716192870976, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1894400, "num_agent_steps_trained": 1894400, "num_env_steps_sampled": 947200, "num_env_steps_trained": 947200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 947200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1894400, "timers": {"training_iteration_time_ms": 3619.249, "learn_time_ms": 1070.336, "learn_throughput": 11958.865, "synch_weights_time_ms": 13.61}, "counters": {"num_env_steps_sampled": 947200, "num_env_steps_trained": 947200, "num_agent_steps_sampled": 1894400, "num_agent_steps_trained": 1894400}, "done": false, "episodes_total": 2368, "training_iteration": 74, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-44", "timestamp": 1666580684, "time_this_iter_s": 3.642000436782837, "time_total_s": 277.81258630752563, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 277.81258630752563, "timesteps_since_restore": 0, "iterations_since_restore": 74, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.666666666666668, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 136.2, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 119.42, "shaped_reward_min": 82, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.41, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.86, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.77, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.9, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.91, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 10.9, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.91, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.9, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.91, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.0587911998450935e-23, "cur_lr": 0.0010000000474974513, "total_loss": -0.002930144779384136, "policy_loss": -0.0031254859641194344, "vf_loss": 8.116436958312988, "vf_explained_var": 0.6190094947814941, "kl": 0.0018185349181294441, "entropy": 1.2326085567474365, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 960000, "num_env_steps_trained": 960000, "num_agent_steps_sampled": 1920000, "num_agent_steps_trained": 1920000}, "sampler_results": {"episode_reward_max": 473.0, "episode_reward_min": 164.0, "episode_reward_mean": 391.82, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 77.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 195.91}, "custom_metrics": {"sparse_reward_mean": 136.2, "sparse_reward_min": 40, "sparse_reward_max": 160, "shaped_reward_mean": 119.42, "shaped_reward_min": 82, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.41, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 11.86, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.77, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 10, "onion_drop_agent_1_mean": 1.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 7, "useful_onion_drop_agent_1_mean": 0.69, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 10.9, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.91, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 2.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.49, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.78, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.66, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 6, "soup_pickup_agent_0_mean": 3.72, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.6, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 10.9, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.91, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 10.9, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.91, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867719801502224, "mean_inference_ms": 1.2099976017691645, "mean_action_processing_ms": 0.1329235273872266, "mean_env_wait_ms": 0.861473241182441, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 473.0, "episode_reward_min": 164.0, "episode_reward_mean": 391.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 77.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 195.91}, "hist_stats": {"episode_reward": [347.0, 353.0, 347.0, 396.0, 367.0, 410.0, 355.0, 395.0, 349.0, 405.0, 450.0, 459.0, 447.0, 410.0, 293.0, 399.0, 164.0, 450.0, 353.0, 444.0, 356.0, 387.0, 405.0, 356.0, 404.0, 356.0, 398.0, 407.0, 361.0, 387.0, 413.0, 392.0, 353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [188.0, 159.0, 176.0, 177.0, 161.0, 186.0, 199.0, 197.0, 175.0, 192.0, 194.0, 216.0, 174.0, 181.0, 190.0, 205.0, 182.0, 167.0, 211.0, 194.0, 217.0, 233.0, 236.0, 223.0, 234.0, 213.0, 211.0, 199.0, 153.0, 140.0, 199.0, 200.0, 87.0, 77.0, 222.0, 228.0, 170.0, 183.0, 223.0, 221.0, 194.0, 162.0, 190.0, 197.0, 210.0, 195.0, 185.0, 171.0, 205.0, 199.0, 185.0, 171.0, 174.0, 224.0, 184.0, 223.0, 182.0, 179.0, 183.0, 204.0, 198.0, 215.0, 202.0, 190.0, 163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867719801502224, "mean_inference_ms": 1.2099976017691645, "mean_action_processing_ms": 0.1329235273872266, "mean_env_wait_ms": 0.861473241182441, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1920000, "num_agent_steps_trained": 1920000, "num_env_steps_sampled": 960000, "num_env_steps_trained": 960000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 960000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1920000, "timers": {"training_iteration_time_ms": 3615.628, "learn_time_ms": 1066.355, "learn_throughput": 12003.507, "synch_weights_time_ms": 13.679}, "counters": {"num_env_steps_sampled": 960000, "num_env_steps_trained": 960000, "num_agent_steps_sampled": 1920000, "num_agent_steps_trained": 1920000}, "done": false, "episodes_total": 2400, "training_iteration": 75, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-47", "timestamp": 1666580687, "time_this_iter_s": 3.4958388805389404, "time_total_s": 281.3084251880646, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 281.3084251880646, "timesteps_since_restore": 0, "iterations_since_restore": 75, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.6, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 139.0, "sparse_reward_min": 80, "sparse_reward_max": 160, "shaped_reward_mean": 120.43, "shaped_reward_min": 82, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.53, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.27, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 11.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.67, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.71, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.85, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.51, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.7, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.85, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.85, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.293955999225468e-24, "cur_lr": 0.0010000000474974513, "total_loss": -0.0020715848077088594, "policy_loss": -0.002257078420370817, "vf_loss": 8.082441329956055, "vf_explained_var": 0.6958982944488525, "kl": 0.0021980281453579664, "entropy": 1.2455064058303833, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 972800, "num_env_steps_trained": 972800, "num_agent_steps_sampled": 1945600, "num_agent_steps_trained": 1945600}, "sampler_results": {"episode_reward_max": 473.0, "episode_reward_min": 247.0, "episode_reward_mean": 398.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 199.215}, "custom_metrics": {"sparse_reward_mean": 139.0, "sparse_reward_min": 80, "sparse_reward_max": 160, "shaped_reward_mean": 120.43, "shaped_reward_min": 82, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.53, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.27, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 11.9, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.67, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.56, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.71, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.85, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.2, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.55, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 8, "dish_drop_agent_1_mean": 1.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.76, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.51, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.7, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.85, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.85, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866173620774393, "mean_inference_ms": 1.2097806306130832, "mean_action_processing_ms": 0.13288691750121373, "mean_env_wait_ms": 0.8607359373881026, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 473.0, "episode_reward_min": 247.0, "episode_reward_mean": 398.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 252.0}, "policy_reward_mean": {"ppo": 199.215}, "hist_stats": {"episode_reward": [353.0, 364.0, 285.0, 398.0, 405.0, 407.0, 399.0, 393.0, 402.0, 456.0, 396.0, 453.0, 390.0, 287.0, 444.0, 341.0, 359.0, 387.0, 396.0, 459.0, 282.0, 450.0, 402.0, 399.0, 390.0, 408.0, 413.0, 396.0, 402.0, 447.0, 304.0, 456.0, 349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [163.0, 190.0, 164.0, 200.0, 145.0, 140.0, 201.0, 197.0, 199.0, 206.0, 191.0, 216.0, 189.0, 210.0, 201.0, 192.0, 197.0, 205.0, 240.0, 216.0, 201.0, 195.0, 201.0, 252.0, 199.0, 191.0, 148.0, 139.0, 222.0, 222.0, 164.0, 177.0, 156.0, 203.0, 179.0, 208.0, 209.0, 187.0, 233.0, 226.0, 135.0, 147.0, 238.0, 212.0, 211.0, 191.0, 200.0, 199.0, 180.0, 210.0, 210.0, 198.0, 201.0, 212.0, 192.0, 204.0, 199.0, 203.0, 229.0, 218.0, 140.0, 164.0, 230.0, 226.0, 170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866173620774393, "mean_inference_ms": 1.2097806306130832, "mean_action_processing_ms": 0.13288691750121373, "mean_env_wait_ms": 0.8607359373881026, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1945600, "num_agent_steps_trained": 1945600, "num_env_steps_sampled": 972800, "num_env_steps_trained": 972800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 972800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1945600, "timers": {"training_iteration_time_ms": 3608.38, "learn_time_ms": 1057.828, "learn_throughput": 12100.26, "synch_weights_time_ms": 13.809}, "counters": {"num_env_steps_sampled": 972800, "num_env_steps_trained": 972800, "num_agent_steps_sampled": 1945600, "num_agent_steps_trained": 1945600}, "done": false, "episodes_total": 2432, "training_iteration": 76, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-51", "timestamp": 1666580691, "time_this_iter_s": 3.582211494445801, "time_total_s": 284.8906366825104, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 284.8906366825104, "timesteps_since_restore": 0, "iterations_since_restore": 76, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.580000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 143.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 123.3, "shaped_reward_min": 87, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.95, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.37, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.66, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.57, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.89, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.85, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.57, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.89, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.57, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.89, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.646977999612734e-24, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009492296376265585, "policy_loss": 0.0007508254493586719, "vf_loss": 8.144251823425293, "vf_explained_var": 0.6694010496139526, "kl": 0.002012323122471571, "entropy": 1.2320451736450195, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 985600, "num_env_steps_trained": 985600, "num_agent_steps_sampled": 1971200, "num_agent_steps_trained": 1971200}, "sampler_results": {"episode_reward_max": 513.0, "episode_reward_min": 247.0, "episode_reward_mean": 409.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 204.85}, "custom_metrics": {"sparse_reward_mean": 143.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 123.3, "shaped_reward_min": 87, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.95, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.37, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 12.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 12.66, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.93, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.54, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.57, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 17, "potting_onion_agent_1_mean": 11.89, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 6, "useful_dish_pickup_agent_1_mean": 2.59, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 1.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.56, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.85, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.72, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.57, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 17, "optimal_onion_potting_agent_1_mean": 11.89, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.57, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 17, "viable_onion_potting_agent_1_mean": 11.89, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6865708178972108, "mean_inference_ms": 1.2096254806364681, "mean_action_processing_ms": 0.13287110233558327, "mean_env_wait_ms": 0.8600840821401189, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 513.0, "episode_reward_min": 247.0, "episode_reward_mean": 409.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 204.85}, "hist_stats": {"episode_reward": [349.0, 304.0, 252.0, 413.0, 332.0, 416.0, 347.0, 441.0, 399.0, 342.0, 413.0, 344.0, 336.0, 456.0, 473.0, 408.0, 459.0, 450.0, 456.0, 456.0, 410.0, 459.0, 384.0, 410.0, 349.0, 381.0, 462.0, 404.0, 404.0, 398.0, 390.0, 387.0, 444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [170.0, 179.0, 142.0, 162.0, 140.0, 112.0, 213.0, 200.0, 169.0, 163.0, 213.0, 203.0, 179.0, 168.0, 205.0, 236.0, 190.0, 209.0, 156.0, 186.0, 200.0, 213.0, 166.0, 178.0, 179.0, 157.0, 221.0, 235.0, 235.0, 238.0, 190.0, 218.0, 238.0, 221.0, 217.0, 233.0, 225.0, 231.0, 228.0, 228.0, 209.0, 201.0, 225.0, 234.0, 179.0, 205.0, 201.0, 209.0, 183.0, 166.0, 169.0, 212.0, 211.0, 251.0, 199.0, 205.0, 203.0, 201.0, 208.0, 190.0, 185.0, 205.0, 190.0, 197.0, 209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6865708178972108, "mean_inference_ms": 1.2096254806364681, "mean_action_processing_ms": 0.13287110233558327, "mean_env_wait_ms": 0.8600840821401189, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1971200, "num_agent_steps_trained": 1971200, "num_env_steps_sampled": 985600, "num_env_steps_trained": 985600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 985600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1971200, "timers": {"training_iteration_time_ms": 3608.625, "learn_time_ms": 1058.889, "learn_throughput": 12088.147, "synch_weights_time_ms": 13.773}, "counters": {"num_env_steps_sampled": 985600, "num_env_steps_trained": 985600, "num_agent_steps_sampled": 1971200, "num_agent_steps_trained": 1971200}, "done": false, "episodes_total": 2464, "training_iteration": 77, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-55", "timestamp": 1666580695, "time_this_iter_s": 3.6801352500915527, "time_total_s": 288.57077193260193, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 288.57077193260193, "timesteps_since_restore": 0, "iterations_since_restore": 77, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666668, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 144.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.12, "shaped_reward_min": 60, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.16, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.79, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.02, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.02, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.02, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.323488999806367e-24, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015237071784213185, "policy_loss": 0.0013373284600675106, "vf_loss": 8.073976516723633, "vf_explained_var": 0.6980078220367432, "kl": 0.0019213203340768814, "entropy": 1.2420413494110107, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 998400, "num_env_steps_trained": 998400, "num_agent_steps_sampled": 1996800, "num_agent_steps_trained": 1996800}, "sampler_results": {"episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 412.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 206.16}, "custom_metrics": {"sparse_reward_mean": 144.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.12, "shaped_reward_min": 60, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.16, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.79, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.83, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.02, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.52, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.61, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.02, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.02, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866386191695207, "mean_inference_ms": 1.2096373080243905, "mean_action_processing_ms": 0.13288295376807122, "mean_env_wait_ms": 0.8596358143701005, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 412.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 206.16}, "hist_stats": {"episode_reward": [444.0, 407.0, 450.0, 459.0, 384.0, 399.0, 296.0, 450.0, 453.0, 441.0, 405.0, 305.0, 370.0, 405.0, 456.0, 427.0, 398.0, 247.0, 405.0, 456.0, 370.0, 293.0, 387.0, 396.0, 456.0, 450.0, 404.0, 393.0, 444.0, 387.0, 404.0, 453.0, 398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [209.0, 235.0, 199.0, 208.0, 236.0, 214.0, 233.0, 226.0, 202.0, 182.0, 206.0, 193.0, 153.0, 143.0, 214.0, 236.0, 217.0, 236.0, 232.0, 209.0, 192.0, 213.0, 142.0, 163.0, 186.0, 184.0, 205.0, 200.0, 228.0, 228.0, 210.0, 217.0, 190.0, 208.0, 138.0, 109.0, 208.0, 197.0, 236.0, 220.0, 190.0, 180.0, 155.0, 138.0, 187.0, 200.0, 198.0, 198.0, 220.0, 236.0, 228.0, 222.0, 180.0, 224.0, 188.0, 205.0, 225.0, 219.0, 202.0, 185.0, 204.0, 200.0, 226.0, 227.0, 210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866386191695207, "mean_inference_ms": 1.2096373080243905, "mean_action_processing_ms": 0.13288295376807122, "mean_env_wait_ms": 0.8596358143701005, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 1996800, "num_agent_steps_trained": 1996800, "num_env_steps_sampled": 998400, "num_env_steps_trained": 998400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 998400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 1996800, "timers": {"training_iteration_time_ms": 3616.948, "learn_time_ms": 1068.299, "learn_throughput": 11981.665, "synch_weights_time_ms": 13.675}, "counters": {"num_env_steps_sampled": 998400, "num_env_steps_trained": 998400, "num_agent_steps_sampled": 1996800, "num_agent_steps_trained": 1996800}, "done": false, "episodes_total": 2496, "training_iteration": 78, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-04-59", "timestamp": 1666580699, "time_this_iter_s": 3.7753701210021973, "time_total_s": 292.3461420536041, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 292.3461420536041, "timesteps_since_restore": 0, "iterations_since_restore": 78, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.220000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 146.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.91, "shaped_reward_min": 60, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.87, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.49, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.31, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.84, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.46, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.97, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.72, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.617444999031835e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.001587323728017509, "policy_loss": -0.0017705481732264161, "vf_loss": 8.071381568908691, "vf_explained_var": 0.6893019676208496, "kl": 0.0017396470066159964, "entropy": 1.2478301525115967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1011200, "num_env_steps_trained": 1011200, "num_agent_steps_sampled": 2022400, "num_agent_steps_trained": 2022400}, "sampler_results": {"episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 415.91, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 207.955}, "custom_metrics": {"sparse_reward_mean": 146.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.91, "shaped_reward_min": 60, "shaped_reward_max": 153, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 12.87, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 13.49, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.31, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.84, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.91, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 2.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.46, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.52, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.97, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.72, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867761483431252, "mean_inference_ms": 1.2096410126566395, "mean_action_processing_ms": 0.13289617100446416, "mean_env_wait_ms": 0.8591877879141236, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 415.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 207.955}, "hist_stats": {"episode_reward": [398.0, 441.0, 456.0, 447.0, 444.0, 393.0, 365.0, 356.0, 450.0, 453.0, 453.0, 390.0, 513.0, 462.0, 330.0, 447.0, 416.0, 407.0, 307.0, 450.0, 456.0, 453.0, 399.0, 447.0, 396.0, 444.0, 450.0, 390.0, 447.0, 462.0, 465.0, 450.0, 405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [210.0, 188.0, 216.0, 225.0, 226.0, 230.0, 227.0, 220.0, 219.0, 225.0, 213.0, 180.0, 175.0, 190.0, 175.0, 181.0, 213.0, 237.0, 225.0, 228.0, 227.0, 226.0, 184.0, 206.0, 247.0, 266.0, 237.0, 225.0, 172.0, 158.0, 221.0, 226.0, 209.0, 207.0, 200.0, 207.0, 162.0, 145.0, 233.0, 217.0, 218.0, 238.0, 233.0, 220.0, 194.0, 205.0, 230.0, 217.0, 215.0, 181.0, 214.0, 230.0, 219.0, 231.0, 197.0, 193.0, 225.0, 222.0, 238.0, 224.0, 235.0, 230.0, 234.0, 216.0, 199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867761483431252, "mean_inference_ms": 1.2096410126566395, "mean_action_processing_ms": 0.13289617100446416, "mean_env_wait_ms": 0.8591877879141236, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2022400, "num_agent_steps_trained": 2022400, "num_env_steps_sampled": 1011200, "num_env_steps_trained": 1011200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1011200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2022400, "timers": {"training_iteration_time_ms": 3596.697, "learn_time_ms": 1053.01, "learn_throughput": 12155.626, "synch_weights_time_ms": 13.294}, "counters": {"num_env_steps_sampled": 1011200, "num_env_steps_trained": 1011200, "num_agent_steps_sampled": 2022400, "num_agent_steps_trained": 2022400}, "done": false, "episodes_total": 2528, "training_iteration": 79, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-03", "timestamp": 1666580703, "time_this_iter_s": 3.525020122528076, "time_total_s": 295.8711621761322, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 295.8711621761322, "timesteps_since_restore": 0, "iterations_since_restore": 79, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.34, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 145.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.27, "shaped_reward_min": 60, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.29, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.73, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.97, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.55, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.0, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.9, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.55, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.0, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.55, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.0, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.3087224995159173e-25, "cur_lr": 0.0010000000474974513, "total_loss": 3.7081073969602585e-05, "policy_loss": -0.000135608424898237, "vf_loss": 7.9923295974731445, "vf_explained_var": 0.6859964728355408, "kl": 0.0021027429029345512, "entropy": 1.2530903816223145, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1024000, "num_env_steps_trained": 1024000, "num_agent_steps_sampled": 2048000, "num_agent_steps_trained": 2048000}, "sampler_results": {"episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 413.67, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 257.0}, "policy_reward_mean": {"ppo": 206.835}, "custom_metrics": {"sparse_reward_mean": 145.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 123.27, "shaped_reward_min": 60, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.29, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.49, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.73, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.97, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.55, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.0, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.58, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.54, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.9, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.09, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.55, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.0, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.55, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.0, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686908689458971, "mean_inference_ms": 1.2096171268596805, "mean_action_processing_ms": 0.13290034895586486, "mean_env_wait_ms": 0.8587133886503696, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 513.0, "episode_reward_min": 180.0, "episode_reward_mean": 413.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 257.0}, "policy_reward_mean": {"ppo": 206.835}, "hist_stats": {"episode_reward": [405.0, 444.0, 453.0, 453.0, 464.0, 390.0, 424.0, 455.0, 453.0, 441.0, 444.0, 387.0, 450.0, 462.0, 447.0, 447.0, 290.0, 408.0, 453.0, 470.0, 470.0, 183.0, 339.0, 438.0, 450.0, 352.0, 344.0, 392.0, 450.0, 444.0, 180.0, 407.0, 401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 206.0, 223.0, 221.0, 239.0, 214.0, 230.0, 223.0, 238.0, 226.0, 185.0, 205.0, 226.0, 198.0, 228.0, 227.0, 233.0, 220.0, 217.0, 224.0, 214.0, 230.0, 191.0, 196.0, 214.0, 236.0, 233.0, 229.0, 208.0, 239.0, 221.0, 226.0, 145.0, 145.0, 200.0, 208.0, 221.0, 232.0, 227.0, 243.0, 244.0, 226.0, 103.0, 80.0, 184.0, 155.0, 212.0, 226.0, 228.0, 222.0, 159.0, 193.0, 167.0, 177.0, 205.0, 187.0, 224.0, 226.0, 212.0, 232.0, 89.0, 91.0, 210.0, 197.0, 190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686908689458971, "mean_inference_ms": 1.2096171268596805, "mean_action_processing_ms": 0.13290034895586486, "mean_env_wait_ms": 0.8587133886503696, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2048000, "num_agent_steps_trained": 2048000, "num_env_steps_sampled": 1024000, "num_env_steps_trained": 1024000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1024000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2048000, "timers": {"training_iteration_time_ms": 3611.57, "learn_time_ms": 1067.578, "learn_throughput": 11989.755, "synch_weights_time_ms": 12.678}, "counters": {"num_env_steps_sampled": 1024000, "num_env_steps_trained": 1024000, "num_agent_steps_sampled": 2048000, "num_agent_steps_trained": 2048000}, "done": false, "episodes_total": 2560, "training_iteration": 80, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-06", "timestamp": 1666580706, "time_this_iter_s": 3.7415013313293457, "time_total_s": 299.61266350746155, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 299.61266350746155, "timesteps_since_restore": 0, "iterations_since_restore": 80, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.083333333333332, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 145.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 124.22, "shaped_reward_min": 67, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.27, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.24, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.74, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.79, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.92, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.91, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.79, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.92, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.79, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.92, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.6543612497579586e-25, "cur_lr": 0.0010000000474974513, "total_loss": -0.0057379272766411304, "policy_loss": -0.005914425943046808, "vf_loss": 7.993932723999023, "vf_explained_var": 0.675482451915741, "kl": 0.001754501834511757, "entropy": 1.2457929849624634, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1036800, "num_env_steps_trained": 1036800, "num_agent_steps_sampled": 2073600, "num_agent_steps_trained": 2073600}, "sampler_results": {"episode_reward_max": 513.0, "episode_reward_min": 227.0, "episode_reward_mean": 414.62, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 113.0}, "policy_reward_max": {"ppo": 257.0}, "policy_reward_mean": {"ppo": 207.31}, "custom_metrics": {"sparse_reward_mean": 145.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 124.22, "shaped_reward_min": 67, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.27, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.24, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.74, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.54, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 11.79, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.92, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.56, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.62, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 3.91, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.79, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.92, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.79, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.92, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868749087582313, "mean_inference_ms": 1.2094324542263242, "mean_action_processing_ms": 0.1328880160108227, "mean_env_wait_ms": 0.8581099319466553, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 513.0, "episode_reward_min": 227.0, "episode_reward_mean": 414.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 113.0}, "policy_reward_max": {"ppo": 257.0}, "policy_reward_mean": {"ppo": 207.31}, "hist_stats": {"episode_reward": [401.0, 410.0, 351.0, 450.0, 456.0, 462.0, 447.0, 309.0, 462.0, 441.0, 453.0, 450.0, 293.0, 390.0, 294.0, 453.0, 361.0, 459.0, 456.0, 465.0, 390.0, 421.0, 396.0, 390.0, 504.0, 310.0, 404.0, 395.0, 444.0, 330.0, 453.0, 407.0, 444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [190.0, 211.0, 200.0, 210.0, 180.0, 171.0, 234.0, 216.0, 238.0, 218.0, 224.0, 238.0, 222.0, 225.0, 158.0, 151.0, 232.0, 230.0, 223.0, 218.0, 241.0, 212.0, 212.0, 238.0, 134.0, 159.0, 205.0, 185.0, 154.0, 140.0, 228.0, 225.0, 185.0, 176.0, 225.0, 234.0, 218.0, 238.0, 236.0, 229.0, 189.0, 201.0, 199.0, 222.0, 176.0, 220.0, 190.0, 200.0, 253.0, 251.0, 156.0, 154.0, 201.0, 203.0, 190.0, 205.0, 221.0, 223.0, 161.0, 169.0, 225.0, 228.0, 204.0, 203.0, 224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868749087582313, "mean_inference_ms": 1.2094324542263242, "mean_action_processing_ms": 0.1328880160108227, "mean_env_wait_ms": 0.8581099319466553, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2073600, "num_agent_steps_trained": 2073600, "num_env_steps_sampled": 1036800, "num_env_steps_trained": 1036800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1036800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2073600, "timers": {"training_iteration_time_ms": 3615.382, "learn_time_ms": 1075.141, "learn_throughput": 11905.41, "synch_weights_time_ms": 12.896}, "counters": {"num_env_steps_sampled": 1036800, "num_env_steps_trained": 1036800, "num_agent_steps_sampled": 2073600, "num_agent_steps_trained": 2073600}, "done": false, "episodes_total": 2592, "training_iteration": 81, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-10", "timestamp": 1666580710, "time_this_iter_s": 3.6494596004486084, "time_total_s": 303.26212310791016, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 303.26212310791016, "timesteps_since_restore": 0, "iterations_since_restore": 81, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.1, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 143.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 124.12, "shaped_reward_min": 67, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.48, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.08, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.46, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 11.99, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.59, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.47, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 11.99, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.59, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.99, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.59, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 8.271806248789793e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.004569070879369974, "policy_loss": -0.00474266242235899, "vf_loss": 8.008241653442383, "vf_explained_var": 0.7025030851364136, "kl": 0.0017891178140416741, "entropy": 1.2544690370559692, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1049600, "num_env_steps_trained": 1049600, "num_agent_steps_sampled": 2099200, "num_agent_steps_trained": 2099200}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 410.92, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 205.46}, "custom_metrics": {"sparse_reward_mean": 143.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 124.12, "shaped_reward_min": 67, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.48, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 13.08, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 12.46, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.69, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.64, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 11.99, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.59, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 2.54, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.47, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.64, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.73, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.62, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.1, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.13, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 11.99, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.59, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.99, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.59, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868536372512659, "mean_inference_ms": 1.2092900191294707, "mean_action_processing_ms": 0.13288051052549346, "mean_env_wait_ms": 0.8576067512026819, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 410.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 205.46}, "hist_stats": {"episode_reward": [444.0, 459.0, 459.0, 396.0, 438.0, 441.0, 455.0, 450.0, 433.0, 441.0, 359.0, 453.0, 410.0, 393.0, 459.0, 456.0, 301.0, 456.0, 447.0, 387.0, 513.0, 387.0, 453.0, 479.0, 407.0, 447.0, 456.0, 401.0, 465.0, 330.0, 398.0, 227.0, 444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [224.0, 220.0, 226.0, 233.0, 235.0, 224.0, 200.0, 196.0, 202.0, 236.0, 238.0, 203.0, 231.0, 224.0, 218.0, 232.0, 206.0, 227.0, 225.0, 216.0, 180.0, 179.0, 239.0, 214.0, 193.0, 217.0, 200.0, 193.0, 218.0, 241.0, 244.0, 212.0, 150.0, 151.0, 234.0, 222.0, 221.0, 226.0, 183.0, 204.0, 256.0, 257.0, 204.0, 183.0, 225.0, 228.0, 239.0, 240.0, 206.0, 201.0, 230.0, 217.0, 216.0, 240.0, 204.0, 197.0, 229.0, 236.0, 141.0, 189.0, 201.0, 197.0, 114.0, 113.0, 232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868536372512659, "mean_inference_ms": 1.2092900191294707, "mean_action_processing_ms": 0.13288051052549346, "mean_env_wait_ms": 0.8576067512026819, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2099200, "num_agent_steps_trained": 2099200, "num_env_steps_sampled": 1049600, "num_env_steps_trained": 1049600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1049600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2099200, "timers": {"training_iteration_time_ms": 3617.381, "learn_time_ms": 1077.647, "learn_throughput": 11877.732, "synch_weights_time_ms": 12.896}, "counters": {"num_env_steps_sampled": 1049600, "num_env_steps_trained": 1049600, "num_agent_steps_sampled": 2099200, "num_agent_steps_trained": 2099200}, "done": false, "episodes_total": 2624, "training_iteration": 82, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-14", "timestamp": 1666580714, "time_this_iter_s": 3.685708999633789, "time_total_s": 306.94783210754395, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 306.94783210754395, "timesteps_since_restore": 0, "iterations_since_restore": 82, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.249999999999996, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 144.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 125.24, "shaped_reward_min": 84, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.09, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.47, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 12.13, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 11.6, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.81, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.94, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 12.13, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 11.6, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.13, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 11.6, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.1359031243948966e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003301555407233536, "policy_loss": -0.000513147097080946, "vf_loss": 8.041152954101562, "vf_explained_var": 0.6765406131744385, "kl": 0.002183079021051526, "entropy": 1.2422490119934082, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1062400, "num_env_steps_trained": 1062400, "num_agent_steps_sampled": 2124800, "num_agent_steps_trained": 2124800}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 414.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 207.02}, "custom_metrics": {"sparse_reward_mean": 144.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 125.24, "shaped_reward_min": 84, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.09, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 12.47, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.7, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.62, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 12.13, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 11.6, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.52, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.8, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.71, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.81, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.56, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.94, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 12.13, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 11.6, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.13, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 11.6, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868376859123841, "mean_inference_ms": 1.2091793351063924, "mean_action_processing_ms": 0.1328811999340644, "mean_env_wait_ms": 0.8571544936742166, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 414.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 207.02}, "hist_stats": {"episode_reward": [444.0, 387.0, 396.0, 444.0, 416.0, 402.0, 462.0, 393.0, 456.0, 456.0, 401.0, 450.0, 441.0, 459.0, 319.0, 344.0, 447.0, 410.0, 453.0, 447.0, 447.0, 455.0, 393.0, 453.0, 453.0, 396.0, 470.0, 345.0, 307.0, 396.0, 307.0, 288.0, 444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 212.0, 203.0, 184.0, 213.0, 183.0, 229.0, 215.0, 213.0, 203.0, 207.0, 195.0, 234.0, 228.0, 202.0, 191.0, 219.0, 237.0, 231.0, 225.0, 198.0, 203.0, 226.0, 224.0, 219.0, 222.0, 232.0, 227.0, 154.0, 165.0, 167.0, 177.0, 205.0, 242.0, 206.0, 204.0, 224.0, 229.0, 238.0, 209.0, 225.0, 222.0, 242.0, 213.0, 208.0, 185.0, 228.0, 225.0, 234.0, 219.0, 197.0, 199.0, 240.0, 230.0, 184.0, 161.0, 152.0, 155.0, 203.0, 193.0, 157.0, 150.0, 147.0, 141.0, 216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868376859123841, "mean_inference_ms": 1.2091793351063924, "mean_action_processing_ms": 0.1328811999340644, "mean_env_wait_ms": 0.8571544936742166, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2124800, "num_agent_steps_trained": 2124800, "num_env_steps_sampled": 1062400, "num_env_steps_trained": 1062400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1062400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2124800, "timers": {"training_iteration_time_ms": 3579.484, "learn_time_ms": 1070.136, "learn_throughput": 11961.094, "synch_weights_time_ms": 12.536}, "counters": {"num_env_steps_sampled": 1062400, "num_env_steps_trained": 1062400, "num_agent_steps_sampled": 2124800, "num_agent_steps_trained": 2124800}, "done": false, "episodes_total": 2656, "training_iteration": 83, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-18", "timestamp": 1666580718, "time_this_iter_s": 3.6267499923706055, "time_total_s": 310.57458209991455, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 310.57458209991455, "timesteps_since_restore": 0, "iterations_since_restore": 83, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.639999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 145.0, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 126.3, "shaped_reward_min": 74, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.6, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.15, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.9, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.4, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 11.97, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.83, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.79, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.13, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.64, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 11.97, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.83, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.97, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.83, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.0679515621974483e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.00299975648522377, "policy_loss": -0.00317044323310256, "vf_loss": 7.9165191650390625, "vf_explained_var": 0.6919734477996826, "kl": 0.0018455530516803265, "entropy": 1.2419322729110718, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1075200, "num_env_steps_trained": 1075200, "num_agent_steps_sampled": 2150400, "num_agent_steps_trained": 2150400}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 416.3, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 208.15}, "custom_metrics": {"sparse_reward_mean": 145.0, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 126.3, "shaped_reward_min": 74, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.6, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.15, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.9, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.4, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.98, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 11, "useful_onion_drop_agent_0_mean": 0.71, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 10, "potting_onion_agent_0_mean": 11.97, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 11.83, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.79, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.72, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.82, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.67, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.13, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 3.64, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.15, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 11.97, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 11.83, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.97, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 11.83, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686855671363231, "mean_inference_ms": 1.2091449764918027, "mean_action_processing_ms": 0.13288788327384127, "mean_env_wait_ms": 0.8567981341073625, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 164.0, "episode_reward_mean": 416.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 81.0}, "policy_reward_max": {"ppo": 266.0}, "policy_reward_mean": {"ppo": 208.15}, "hist_stats": {"episode_reward": [444.0, 416.0, 459.0, 399.0, 473.0, 467.0, 458.0, 306.0, 164.0, 444.0, 459.0, 444.0, 358.0, 413.0, 402.0, 405.0, 393.0, 516.0, 399.0, 410.0, 453.0, 401.0, 410.0, 361.0, 244.0, 441.0, 468.0, 462.0, 344.0, 345.0, 285.0, 398.0, 462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [216.0, 228.0, 217.0, 199.0, 214.0, 245.0, 190.0, 209.0, 236.0, 237.0, 235.0, 232.0, 229.0, 229.0, 152.0, 154.0, 83.0, 81.0, 235.0, 209.0, 215.0, 244.0, 224.0, 220.0, 170.0, 188.0, 199.0, 214.0, 219.0, 183.0, 211.0, 194.0, 189.0, 204.0, 266.0, 250.0, 219.0, 180.0, 197.0, 213.0, 219.0, 234.0, 200.0, 201.0, 203.0, 207.0, 179.0, 182.0, 125.0, 119.0, 219.0, 222.0, 228.0, 240.0, 230.0, 232.0, 184.0, 160.0, 164.0, 181.0, 156.0, 129.0, 214.0, 184.0, 246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686855671363231, "mean_inference_ms": 1.2091449764918027, "mean_action_processing_ms": 0.13288788327384127, "mean_env_wait_ms": 0.8567981341073625, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2150400, "num_agent_steps_trained": 2150400, "num_env_steps_sampled": 1075200, "num_env_steps_trained": 1075200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1075200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2150400, "timers": {"training_iteration_time_ms": 3576.175, "learn_time_ms": 1064.297, "learn_throughput": 12026.715, "synch_weights_time_ms": 10.67}, "counters": {"num_env_steps_sampled": 1075200, "num_env_steps_trained": 1075200, "num_agent_steps_sampled": 2150400, "num_agent_steps_trained": 2150400}, "done": false, "episodes_total": 2688, "training_iteration": 84, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-22", "timestamp": 1666580722, "time_this_iter_s": 3.6167852878570557, "time_total_s": 314.1913673877716, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 314.1913673877716, "timesteps_since_restore": 0, "iterations_since_restore": 84, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.366666666666664, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 148.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.01, "shaped_reward_min": 68, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.34, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.71, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.65, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.98, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.72, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.37, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.89, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.72, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.37, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.72, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.37, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.0339757810987241e-26, "cur_lr": 0.0010000000474974513, "total_loss": -0.00338260130956769, "policy_loss": -0.003559306263923645, "vf_loss": 7.971312522888184, "vf_explained_var": 0.6739051342010498, "kl": 0.0015661935321986675, "entropy": 1.2408554553985596, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1088000, "num_env_steps_trained": 1088000, "num_agent_steps_sampled": 2176000, "num_agent_steps_trained": 2176000}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 194.0, "episode_reward_mean": 423.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 211.705}, "custom_metrics": {"sparse_reward_mean": 148.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.01, "shaped_reward_min": 68, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.34, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.71, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.65, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.98, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.68, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.56, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.72, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.37, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.6, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.72, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.59, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.89, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.96, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.86, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.72, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.37, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.72, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.37, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868809329842898, "mean_inference_ms": 1.2091180245018194, "mean_action_processing_ms": 0.13290214276243056, "mean_env_wait_ms": 0.8564687501783257, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 194.0, "episode_reward_mean": 423.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 211.705}, "hist_stats": {"episode_reward": [462.0, 347.0, 424.0, 381.0, 453.0, 359.0, 413.0, 450.0, 462.0, 396.0, 447.0, 444.0, 465.0, 455.0, 452.0, 462.0, 390.0, 453.0, 344.0, 456.0, 450.0, 498.0, 421.0, 456.0, 390.0, 378.0, 444.0, 453.0, 464.0, 452.0, 402.0, 424.0, 404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 216.0, 168.0, 179.0, 213.0, 211.0, 194.0, 187.0, 237.0, 216.0, 187.0, 172.0, 191.0, 222.0, 226.0, 224.0, 236.0, 226.0, 189.0, 207.0, 225.0, 222.0, 221.0, 223.0, 224.0, 241.0, 236.0, 219.0, 239.0, 213.0, 231.0, 231.0, 192.0, 198.0, 224.0, 229.0, 160.0, 184.0, 218.0, 238.0, 218.0, 232.0, 240.0, 258.0, 204.0, 217.0, 215.0, 241.0, 198.0, 192.0, 182.0, 196.0, 233.0, 211.0, 222.0, 231.0, 226.0, 238.0, 240.0, 212.0, 195.0, 207.0, 218.0, 206.0, 203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868809329842898, "mean_inference_ms": 1.2091180245018194, "mean_action_processing_ms": 0.13290214276243056, "mean_env_wait_ms": 0.8564687501783257, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2176000, "num_agent_steps_trained": 2176000, "num_env_steps_sampled": 1088000, "num_env_steps_trained": 1088000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1088000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2176000, "timers": {"training_iteration_time_ms": 3597.35, "learn_time_ms": 1073.382, "learn_throughput": 11924.93, "synch_weights_time_ms": 10.686}, "counters": {"num_env_steps_sampled": 1088000, "num_env_steps_trained": 1088000, "num_agent_steps_sampled": 2176000, "num_agent_steps_trained": 2176000}, "done": false, "episodes_total": 2720, "training_iteration": 85, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-26", "timestamp": 1666580726, "time_this_iter_s": 3.7110044956207275, "time_total_s": 317.90237188339233, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 317.90237188339233, "timesteps_since_restore": 0, "iterations_since_restore": 85, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.419999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 148.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.55, "shaped_reward_min": 68, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.49, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.51, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.75, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.79, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.95, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.19, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.59, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.9, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.92, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.95, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.19, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.95, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.19, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.169878905493621e-27, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018517355201765895, "policy_loss": -0.002034394070506096, "vf_loss": 7.965593338012695, "vf_explained_var": 0.6685700416564941, "kl": 0.0015647329855710268, "entropy": 1.227802038192749, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1100800, "num_env_steps_trained": 1100800, "num_agent_steps_sampled": 2201600, "num_agent_steps_trained": 2201600}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 194.0, "episode_reward_mean": 423.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 211.975}, "custom_metrics": {"sparse_reward_mean": 148.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 127.55, "shaped_reward_min": 68, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.49, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 13.51, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.75, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.79, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.95, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.65, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 11.95, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.19, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 2.66, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 2.75, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 9, "dish_drop_agent_1_mean": 1.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.59, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 5, "useful_dish_drop_agent_1_mean": 0.5, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 3.9, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.92, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.82, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 11.95, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.19, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.95, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.19, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686874370240036, "mean_inference_ms": 1.2090592390348984, "mean_action_processing_ms": 0.1329117043730399, "mean_env_wait_ms": 0.8561022738174671, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 194.0, "episode_reward_mean": 423.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 211.975}, "hist_stats": {"episode_reward": [404.0, 462.0, 412.0, 401.0, 444.0, 465.0, 387.0, 465.0, 453.0, 390.0, 450.0, 341.0, 411.0, 453.0, 447.0, 462.0, 367.0, 335.0, 398.0, 441.0, 413.0, 456.0, 399.0, 462.0, 444.0, 194.0, 462.0, 410.0, 450.0, 462.0, 373.0, 416.0, 447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [203.0, 201.0, 234.0, 228.0, 203.0, 209.0, 204.0, 197.0, 216.0, 228.0, 236.0, 229.0, 194.0, 193.0, 226.0, 239.0, 233.0, 220.0, 181.0, 209.0, 209.0, 241.0, 172.0, 169.0, 215.0, 196.0, 214.0, 239.0, 219.0, 228.0, 217.0, 245.0, 181.0, 186.0, 179.0, 156.0, 201.0, 197.0, 223.0, 218.0, 203.0, 210.0, 220.0, 236.0, 195.0, 204.0, 226.0, 236.0, 229.0, 215.0, 90.0, 104.0, 223.0, 239.0, 205.0, 205.0, 222.0, 228.0, 236.0, 226.0, 190.0, 183.0, 203.0, 213.0, 220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686874370240036, "mean_inference_ms": 1.2090592390348984, "mean_action_processing_ms": 0.1329117043730399, "mean_env_wait_ms": 0.8561022738174671, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2201600, "num_agent_steps_trained": 2201600, "num_env_steps_sampled": 1100800, "num_env_steps_trained": 1100800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1100800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2201600, "timers": {"training_iteration_time_ms": 3611.444, "learn_time_ms": 1086.391, "learn_throughput": 11782.133, "synch_weights_time_ms": 10.058}, "counters": {"num_env_steps_sampled": 1100800, "num_env_steps_trained": 1100800, "num_agent_steps_sampled": 2201600, "num_agent_steps_trained": 2201600}, "done": false, "episodes_total": 2752, "training_iteration": 86, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-29", "timestamp": 1666580729, "time_this_iter_s": 3.7309343814849854, "time_total_s": 321.6333062648773, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 321.6333062648773, "timesteps_since_restore": 0, "iterations_since_restore": 86, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.200000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 150.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 128.63, "shaped_reward_min": 60, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.47, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.56, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.69, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.85, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.58, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.06, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.96, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 12.06, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.06, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.5849394527468104e-27, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015968354418873787, "policy_loss": -0.001768257119692862, "vf_loss": 7.829046726226807, "vf_explained_var": 0.6879750490188599, "kl": 0.001952783903107047, "entropy": 1.2229652404785156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1113600, "num_env_steps_trained": 1113600, "num_agent_steps_sampled": 2227200, "num_agent_steps_trained": 2227200}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 180.0, "episode_reward_mean": 428.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 214.315}, "custom_metrics": {"sparse_reward_mean": 150.0, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 128.63, "shaped_reward_min": 60, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.47, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.56, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.69, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.85, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 1.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.55, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.58, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.06, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.96, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 1.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.0, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 12.06, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.06, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868516517488307, "mean_inference_ms": 1.209345762889332, "mean_action_processing_ms": 0.13291795937199066, "mean_env_wait_ms": 0.8561722429975059, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 180.0, "episode_reward_mean": 428.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 214.315}, "hist_stats": {"episode_reward": [447.0, 441.0, 459.0, 366.0, 447.0, 513.0, 393.0, 402.0, 462.0, 393.0, 450.0, 404.0, 462.0, 441.0, 507.0, 453.0, 390.0, 522.0, 401.0, 456.0, 459.0, 339.0, 453.0, 384.0, 401.0, 465.0, 247.0, 314.0, 513.0, 387.0, 404.0, 458.0, 228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [220.0, 227.0, 219.0, 222.0, 223.0, 236.0, 180.0, 186.0, 228.0, 219.0, 255.0, 258.0, 181.0, 212.0, 205.0, 197.0, 217.0, 245.0, 192.0, 201.0, 231.0, 219.0, 212.0, 192.0, 237.0, 225.0, 226.0, 215.0, 249.0, 258.0, 234.0, 219.0, 199.0, 191.0, 251.0, 271.0, 202.0, 199.0, 229.0, 227.0, 221.0, 238.0, 163.0, 176.0, 222.0, 231.0, 195.0, 189.0, 185.0, 216.0, 222.0, 243.0, 123.0, 124.0, 164.0, 150.0, 256.0, 257.0, 200.0, 187.0, 222.0, 182.0, 221.0, 237.0, 107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868516517488307, "mean_inference_ms": 1.209345762889332, "mean_action_processing_ms": 0.13291795937199066, "mean_env_wait_ms": 0.8561722429975059, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2227200, "num_agent_steps_trained": 2227200, "num_env_steps_sampled": 1113600, "num_env_steps_trained": 1113600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1113600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2227200, "timers": {"training_iteration_time_ms": 3629.961, "learn_time_ms": 1091.432, "learn_throughput": 11727.714, "synch_weights_time_ms": 9.59}, "counters": {"num_env_steps_sampled": 1113600, "num_env_steps_trained": 1113600, "num_agent_steps_sampled": 2227200, "num_agent_steps_trained": 2227200}, "done": false, "episodes_total": 2784, "training_iteration": 87, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-34", "timestamp": 1666580734, "time_this_iter_s": 3.8736486434936523, "time_total_s": 325.506954908371, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 325.506954908371, "timesteps_since_restore": 0, "iterations_since_restore": 87, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.98, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 150.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 129.13, "shaped_reward_min": 60, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.45, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.8, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.69, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.94, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.16, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.42, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.97, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.16, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.16, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.2924697263734052e-27, "cur_lr": 0.0010000000474974513, "total_loss": -0.003479755949229002, "policy_loss": -0.003657393390312791, "vf_loss": 7.948546886444092, "vf_explained_var": 0.6666540503501892, "kl": 0.0016054816078394651, "entropy": 1.2344354391098022, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1126400, "num_env_steps_trained": 1126400, "num_agent_steps_sampled": 2252800, "num_agent_steps_trained": 2252800}, "sampler_results": {"episode_reward_max": 524.0, "episode_reward_min": 180.0, "episode_reward_mean": 430.33, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 215.165}, "custom_metrics": {"sparse_reward_mean": 150.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 129.13, "shaped_reward_min": 60, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.45, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 13.8, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.69, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 12.94, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.9, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.5, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.72, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.16, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.16, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 2.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 1.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.42, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.05, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 3.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.97, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.16, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.16, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.16, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.16, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868056919516162, "mean_inference_ms": 1.2096204283652425, "mean_action_processing_ms": 0.1329055296257274, "mean_env_wait_ms": 0.8561982876915627, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 524.0, "episode_reward_min": 180.0, "episode_reward_mean": 430.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 215.165}, "hist_stats": {"episode_reward": [228.0, 453.0, 438.0, 513.0, 381.0, 513.0, 459.0, 507.0, 453.0, 404.0, 356.0, 441.0, 462.0, 402.0, 438.0, 453.0, 416.0, 398.0, 462.0, 447.0, 470.0, 456.0, 467.0, 459.0, 456.0, 456.0, 465.0, 291.0, 413.0, 435.0, 462.0, 396.0, 359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [107.0, 121.0, 239.0, 214.0, 209.0, 229.0, 267.0, 246.0, 190.0, 191.0, 241.0, 272.0, 230.0, 229.0, 271.0, 236.0, 240.0, 213.0, 196.0, 208.0, 185.0, 171.0, 216.0, 225.0, 235.0, 227.0, 202.0, 200.0, 220.0, 218.0, 231.0, 222.0, 205.0, 211.0, 191.0, 207.0, 232.0, 230.0, 211.0, 236.0, 230.0, 240.0, 239.0, 217.0, 234.0, 233.0, 220.0, 239.0, 231.0, 225.0, 235.0, 221.0, 226.0, 239.0, 138.0, 153.0, 193.0, 220.0, 231.0, 204.0, 245.0, 217.0, 207.0, 189.0, 174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868056919516162, "mean_inference_ms": 1.2096204283652425, "mean_action_processing_ms": 0.1329055296257274, "mean_env_wait_ms": 0.8561982876915627, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2252800, "num_agent_steps_trained": 2252800, "num_env_steps_sampled": 1126400, "num_env_steps_trained": 1126400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1126400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2252800, "timers": {"training_iteration_time_ms": 3648.703, "learn_time_ms": 1098.582, "learn_throughput": 11651.38, "synch_weights_time_ms": 9.691}, "counters": {"num_env_steps_sampled": 1126400, "num_env_steps_trained": 1126400, "num_agent_steps_sampled": 2252800, "num_agent_steps_trained": 2252800}, "done": false, "episodes_total": 2816, "training_iteration": 88, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-38", "timestamp": 1666580738, "time_this_iter_s": 3.93886137008667, "time_total_s": 329.44581627845764, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 329.44581627845764, "timesteps_since_restore": 0, "iterations_since_restore": 88, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666664, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 149.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 130.82, "shaped_reward_min": 60, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.42, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.16, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.57, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.1, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.88, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.06, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.35, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.01, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.11, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.06, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.35, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.06, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.35, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.462348631867026e-28, "cur_lr": 0.0010000000474974513, "total_loss": -0.0031336257234215736, "policy_loss": -0.003304777666926384, "vf_loss": 7.8458757400512695, "vf_explained_var": 0.7096177339553833, "kl": 0.0018895509419962764, "entropy": 1.2268693447113037, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1139200, "num_env_steps_trained": 1139200, "num_agent_steps_sampled": 2278400, "num_agent_steps_trained": 2278400}, "sampler_results": {"episode_reward_max": 524.0, "episode_reward_min": 180.0, "episode_reward_mean": 430.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 215.21}, "custom_metrics": {"sparse_reward_mean": 149.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 130.82, "shaped_reward_min": 60, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.42, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.16, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.57, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.1, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.45, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.58, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.88, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.06, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.35, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 3.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 8, "useful_dish_drop_agent_0_mean": 0.3, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.01, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.11, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.06, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.35, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.06, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.35, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867888499461925, "mean_inference_ms": 1.2099295994346604, "mean_action_processing_ms": 0.13289270671609127, "mean_env_wait_ms": 0.8562557347149742, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 524.0, "episode_reward_min": 180.0, "episode_reward_mean": 430.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 87.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 215.21}, "hist_stats": {"episode_reward": [359.0, 453.0, 401.0, 370.0, 461.0, 459.0, 402.0, 465.0, 465.0, 456.0, 453.0, 396.0, 444.0, 462.0, 456.0, 441.0, 402.0, 416.0, 410.0, 395.0, 180.0, 462.0, 411.0, 465.0, 336.0, 465.0, 464.0, 513.0, 456.0, 456.0, 441.0, 465.0, 495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [174.0, 185.0, 230.0, 223.0, 216.0, 185.0, 185.0, 185.0, 228.0, 233.0, 222.0, 237.0, 206.0, 196.0, 228.0, 237.0, 220.0, 245.0, 220.0, 236.0, 230.0, 223.0, 214.0, 182.0, 232.0, 212.0, 243.0, 219.0, 228.0, 228.0, 226.0, 215.0, 203.0, 199.0, 202.0, 214.0, 214.0, 196.0, 199.0, 196.0, 87.0, 93.0, 234.0, 228.0, 203.0, 208.0, 231.0, 234.0, 152.0, 184.0, 234.0, 231.0, 237.0, 227.0, 267.0, 246.0, 221.0, 235.0, 230.0, 226.0, 220.0, 221.0, 250.0, 215.0, 254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867888499461925, "mean_inference_ms": 1.2099295994346604, "mean_action_processing_ms": 0.13289270671609127, "mean_env_wait_ms": 0.8562557347149742, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2278400, "num_agent_steps_trained": 2278400, "num_env_steps_sampled": 1139200, "num_env_steps_trained": 1139200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1139200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2278400, "timers": {"training_iteration_time_ms": 3660.43, "learn_time_ms": 1108.366, "learn_throughput": 11548.535, "synch_weights_time_ms": 9.815}, "counters": {"num_env_steps_sampled": 1139200, "num_env_steps_trained": 1139200, "num_agent_steps_sampled": 2278400, "num_agent_steps_trained": 2278400}, "done": false, "episodes_total": 2848, "training_iteration": 89, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-41", "timestamp": 1666580741, "time_this_iter_s": 3.6420814990997314, "time_total_s": 333.0878977775574, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 333.0878977775574, "timesteps_since_restore": 0, "iterations_since_restore": 89, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.133333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 151.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 131.9, "shaped_reward_min": 84, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.43, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.85, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.42, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.67, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.08, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.57, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.43, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.08, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.57, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.08, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.57, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.231174315933513e-28, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013424127828329802, "policy_loss": -0.0015107805375009775, "vf_loss": 7.762334823608398, "vf_explained_var": 0.7075027227401733, "kl": 0.0016440870240330696, "entropy": 1.2157301902770996, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1152000, "num_env_steps_trained": 1152000, "num_agent_steps_sampled": 2304000, "num_agent_steps_trained": 2304000}, "sampler_results": {"episode_reward_max": 524.0, "episode_reward_min": 211.0, "episode_reward_mean": 434.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 100.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 217.35}, "custom_metrics": {"sparse_reward_mean": 151.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 131.9, "shaped_reward_min": 84, "shaped_reward_max": 164, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.43, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 12.85, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.42, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.47, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 8, "useful_onion_drop_agent_0_mean": 0.67, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.89, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.08, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.57, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.43, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.08, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.57, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.08, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.57, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868064875086877, "mean_inference_ms": 1.20987618428007, "mean_action_processing_ms": 0.13288695715863633, "mean_env_wait_ms": 0.8560040849674999, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 524.0, "episode_reward_min": 211.0, "episode_reward_mean": 434.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 100.0}, "policy_reward_max": {"ppo": 278.0}, "policy_reward_mean": {"ppo": 217.35}, "hist_stats": {"episode_reward": [495.0, 341.0, 459.0, 405.0, 507.0, 447.0, 441.0, 470.0, 338.0, 410.0, 453.0, 402.0, 456.0, 453.0, 453.0, 398.0, 444.0, 459.0, 444.0, 341.0, 467.0, 407.0, 444.0, 456.0, 459.0, 407.0, 404.0, 450.0, 378.0, 465.0, 524.0, 393.0, 399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 241.0, 171.0, 170.0, 239.0, 220.0, 210.0, 195.0, 246.0, 261.0, 226.0, 221.0, 221.0, 220.0, 221.0, 249.0, 169.0, 169.0, 222.0, 188.0, 229.0, 224.0, 197.0, 205.0, 220.0, 236.0, 230.0, 223.0, 222.0, 231.0, 192.0, 206.0, 221.0, 223.0, 230.0, 229.0, 218.0, 226.0, 171.0, 170.0, 229.0, 238.0, 209.0, 198.0, 215.0, 229.0, 223.0, 233.0, 241.0, 218.0, 219.0, 188.0, 210.0, 194.0, 232.0, 218.0, 187.0, 191.0, 231.0, 234.0, 246.0, 278.0, 193.0, 200.0, 191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6868064875086877, "mean_inference_ms": 1.20987618428007, "mean_action_processing_ms": 0.13288695715863633, "mean_env_wait_ms": 0.8560040849674999, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2304000, "num_agent_steps_trained": 2304000, "num_env_steps_sampled": 1152000, "num_env_steps_trained": 1152000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1152000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2304000, "timers": {"training_iteration_time_ms": 3653.028, "learn_time_ms": 1101.239, "learn_throughput": 11623.267, "synch_weights_time_ms": 10.331}, "counters": {"num_env_steps_sampled": 1152000, "num_env_steps_trained": 1152000, "num_agent_steps_sampled": 2304000, "num_agent_steps_trained": 2304000}, "done": false, "episodes_total": 2880, "training_iteration": 90, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-45", "timestamp": 1666580745, "time_this_iter_s": 3.6563451290130615, "time_total_s": 336.74424290657043, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 336.74424290657043, "timesteps_since_restore": 0, "iterations_since_restore": 90, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.96, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 152.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 134.4, "shaped_reward_min": 84, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.54, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.92, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.64, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.76, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.03, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.86, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.15, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.37, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.03, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.86, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.03, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.86, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.6155871579667565e-28, "cur_lr": 0.0010000000474974513, "total_loss": -0.0027565264608711004, "policy_loss": -0.0029142703860998154, "vf_loss": 7.637598514556885, "vf_explained_var": 0.6873199343681335, "kl": 0.0016575659392401576, "entropy": 1.2120314836502075, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1164800, "num_env_steps_trained": 1164800, "num_agent_steps_sampled": 2329600, "num_agent_steps_trained": 2329600}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 211.0, "episode_reward_mean": 439.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 100.0}, "policy_reward_max": {"ppo": 265.0}, "policy_reward_mean": {"ppo": 219.8}, "custom_metrics": {"sparse_reward_mean": 152.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 134.4, "shaped_reward_min": 84, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.54, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.92, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.64, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 1.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 6, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.76, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.03, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.86, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.15, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.36, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 6, "dish_drop_agent_0_mean": 1.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.99, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.34, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.37, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 3.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 3.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.08, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.03, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.86, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.03, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.86, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867829633180885, "mean_inference_ms": 1.2097543508448176, "mean_action_processing_ms": 0.13288373966344313, "mean_env_wait_ms": 0.8555906230183881, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 211.0, "episode_reward_mean": 439.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 100.0}, "policy_reward_max": {"ppo": 265.0}, "policy_reward_mean": {"ppo": 219.8}, "hist_stats": {"episode_reward": [399.0, 359.0, 416.0, 459.0, 416.0, 462.0, 510.0, 447.0, 384.0, 467.0, 465.0, 211.0, 462.0, 450.0, 461.0, 467.0, 416.0, 410.0, 410.0, 330.0, 367.0, 456.0, 452.0, 513.0, 413.0, 402.0, 387.0, 422.0, 513.0, 456.0, 476.0, 392.0, 450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [191.0, 208.0, 171.0, 188.0, 211.0, 205.0, 221.0, 238.0, 226.0, 190.0, 250.0, 212.0, 260.0, 250.0, 213.0, 234.0, 194.0, 190.0, 236.0, 231.0, 232.0, 233.0, 100.0, 111.0, 233.0, 229.0, 238.0, 212.0, 221.0, 240.0, 235.0, 232.0, 215.0, 201.0, 191.0, 219.0, 214.0, 196.0, 163.0, 167.0, 182.0, 185.0, 218.0, 238.0, 227.0, 225.0, 257.0, 256.0, 200.0, 213.0, 194.0, 208.0, 182.0, 205.0, 216.0, 206.0, 264.0, 249.0, 226.0, 230.0, 232.0, 244.0, 185.0, 207.0, 226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867829633180885, "mean_inference_ms": 1.2097543508448176, "mean_action_processing_ms": 0.13288373966344313, "mean_env_wait_ms": 0.8555906230183881, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2329600, "num_agent_steps_trained": 2329600, "num_env_steps_sampled": 1164800, "num_env_steps_trained": 1164800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1164800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2329600, "timers": {"training_iteration_time_ms": 3657.266, "learn_time_ms": 1100.033, "learn_throughput": 11636.013, "synch_weights_time_ms": 10.542}, "counters": {"num_env_steps_sampled": 1164800, "num_env_steps_trained": 1164800, "num_agent_steps_sampled": 2329600, "num_agent_steps_trained": 2329600}, "done": false, "episodes_total": 2912, "training_iteration": 91, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-49", "timestamp": 1666580749, "time_this_iter_s": 3.698246955871582, "time_total_s": 340.442489862442, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 340.442489862442, "timesteps_since_restore": 0, "iterations_since_restore": 91, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.766666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 155.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 133.87, "shaped_reward_min": 68, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.84, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.23, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.03, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.78, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 12.17, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.85, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.44, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.84, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.17, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.85, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.17, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.85, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 8.077935789833782e-29, "cur_lr": 0.0010000000474974513, "total_loss": -0.002236593747511506, "policy_loss": -0.0023881965316832066, "vf_loss": 7.572493553161621, "vf_explained_var": 0.7193116545677185, "kl": 0.0020622664596885443, "entropy": 1.211289405822754, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1177600, "num_env_steps_trained": 1177600, "num_agent_steps_sampled": 2355200, "num_agent_steps_trained": 2355200}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 228.0, "episode_reward_mean": 444.27, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 222.135}, "custom_metrics": {"sparse_reward_mean": 155.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 133.87, "shaped_reward_min": 68, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.84, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.23, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.03, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.97, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.78, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.53, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 12.17, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 12.85, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.54, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.99, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 1.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.44, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.46, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.19, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.84, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.17, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 12.85, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.17, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 12.85, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867800256071077, "mean_inference_ms": 1.2096947134492912, "mean_action_processing_ms": 0.13288857442312746, "mean_env_wait_ms": 0.8552171842142949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 228.0, "episode_reward_mean": 444.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 222.135}, "hist_stats": {"episode_reward": [450.0, 468.0, 459.0, 465.0, 456.0, 435.0, 284.0, 450.0, 462.0, 462.0, 473.0, 405.0, 407.0, 476.0, 459.0, 453.0, 453.0, 416.0, 447.0, 407.0, 459.0, 447.0, 450.0, 459.0, 465.0, 347.0, 462.0, 456.0, 459.0, 458.0, 450.0, 453.0, 462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [226.0, 224.0, 249.0, 219.0, 242.0, 217.0, 239.0, 226.0, 227.0, 229.0, 222.0, 213.0, 132.0, 152.0, 231.0, 219.0, 230.0, 232.0, 232.0, 230.0, 238.0, 235.0, 200.0, 205.0, 201.0, 206.0, 247.0, 229.0, 224.0, 235.0, 217.0, 236.0, 213.0, 240.0, 212.0, 204.0, 228.0, 219.0, 213.0, 194.0, 232.0, 227.0, 230.0, 217.0, 219.0, 231.0, 236.0, 223.0, 239.0, 226.0, 174.0, 173.0, 221.0, 241.0, 217.0, 239.0, 220.0, 239.0, 224.0, 234.0, 219.0, 231.0, 224.0, 229.0, 232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867800256071077, "mean_inference_ms": 1.2096947134492912, "mean_action_processing_ms": 0.13288857442312746, "mean_env_wait_ms": 0.8552171842142949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2355200, "num_agent_steps_trained": 2355200, "num_env_steps_sampled": 1177600, "num_env_steps_trained": 1177600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1177600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2355200, "timers": {"training_iteration_time_ms": 3662.618, "learn_time_ms": 1103.205, "learn_throughput": 11602.554, "synch_weights_time_ms": 10.56}, "counters": {"num_env_steps_sampled": 1177600, "num_env_steps_trained": 1177600, "num_agent_steps_sampled": 2355200, "num_agent_steps_trained": 2355200}, "done": false, "episodes_total": 2944, "training_iteration": 92, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-53", "timestamp": 1666580753, "time_this_iter_s": 3.739091634750366, "time_total_s": 344.1815814971924, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 344.1815814971924, "timesteps_since_restore": 0, "iterations_since_restore": 92, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.840000000000003, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 156.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 134.23, "shaped_reward_min": 68, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.39, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.53, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.64, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.82, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.21, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.71, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.82, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.21, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.82, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.21, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.038967894916891e-29, "cur_lr": 0.0010000000474974513, "total_loss": -3.875233232975006e-06, "policy_loss": -0.00017003831453621387, "vf_loss": 7.67985725402832, "vf_explained_var": 0.6588976383209229, "kl": 0.0017944644205272198, "entropy": 1.2036434412002563, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1190400, "num_env_steps_trained": 1190400, "num_agent_steps_sampled": 2380800, "num_agent_steps_trained": 2380800}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 228.0, "episode_reward_mean": 446.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 223.315}, "custom_metrics": {"sparse_reward_mean": 156.2, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 134.23, "shaped_reward_min": 68, "shaped_reward_max": 156, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.39, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.53, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.64, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.93, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.73, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 11.82, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 13.21, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 2.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 1.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.47, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.4, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.71, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 11.82, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 13.21, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 11.82, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 13.21, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867222139007848, "mean_inference_ms": 1.2095571930483882, "mean_action_processing_ms": 0.13287619607452736, "mean_env_wait_ms": 0.8547307612516322, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 228.0, "episode_reward_mean": 446.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 223.315}, "hist_stats": {"episode_reward": [462.0, 465.0, 444.0, 427.0, 407.0, 453.0, 513.0, 473.0, 401.0, 516.0, 370.0, 470.0, 467.0, 465.0, 513.0, 447.0, 390.0, 410.0, 456.0, 456.0, 513.0, 456.0, 395.0, 450.0, 456.0, 492.0, 456.0, 416.0, 419.0, 459.0, 462.0, 453.0, 450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 230.0, 239.0, 226.0, 229.0, 215.0, 200.0, 227.0, 208.0, 199.0, 215.0, 238.0, 258.0, 255.0, 230.0, 243.0, 206.0, 195.0, 262.0, 254.0, 197.0, 173.0, 228.0, 242.0, 234.0, 233.0, 234.0, 231.0, 252.0, 261.0, 221.0, 226.0, 194.0, 196.0, 198.0, 212.0, 244.0, 212.0, 220.0, 236.0, 248.0, 265.0, 218.0, 238.0, 216.0, 179.0, 216.0, 234.0, 229.0, 227.0, 250.0, 242.0, 224.0, 232.0, 195.0, 221.0, 208.0, 211.0, 240.0, 219.0, 235.0, 227.0, 220.0, 233.0, 228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6867222139007848, "mean_inference_ms": 1.2095571930483882, "mean_action_processing_ms": 0.13287619607452736, "mean_env_wait_ms": 0.8547307612516322, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2380800, "num_agent_steps_trained": 2380800, "num_env_steps_sampled": 1190400, "num_env_steps_trained": 1190400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1190400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2380800, "timers": {"training_iteration_time_ms": 3671.226, "learn_time_ms": 1120.3, "learn_throughput": 11425.515, "synch_weights_time_ms": 10.939}, "counters": {"num_env_steps_sampled": 1190400, "num_env_steps_trained": 1190400, "num_agent_steps_sampled": 2380800, "num_agent_steps_trained": 2380800}, "done": false, "episodes_total": 2976, "training_iteration": 93, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-05-57", "timestamp": 1666580757, "time_this_iter_s": 3.6920645236968994, "time_total_s": 347.8736460208893, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 347.8736460208893, "timesteps_since_restore": 0, "iterations_since_restore": 93, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.63333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 157.0, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 134.47, "shaped_reward_min": 68, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.69, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.28, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.85, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.61, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.11, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.95, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.28, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.11, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.95, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.11, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.95, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.0194839474584456e-29, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005498485988937318, "policy_loss": 0.00038948917062953115, "vf_loss": 7.58758544921875, "vf_explained_var": 0.6836056709289551, "kl": 0.0016990273725241423, "entropy": 1.1967947483062744, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1203200, "num_env_steps_trained": 1203200, "num_agent_steps_sampled": 2406400, "num_agent_steps_trained": 2406400}, "sampler_results": {"episode_reward_max": 519.0, "episode_reward_min": 228.0, "episode_reward_mean": 448.47, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 224.235}, "custom_metrics": {"sparse_reward_mean": 157.0, "sparse_reward_min": 80, "sparse_reward_max": 180, "shaped_reward_mean": 134.47, "shaped_reward_min": 68, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.69, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.28, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 12.85, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.61, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.11, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.95, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 1.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.95, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.4, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.32, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.28, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.76, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.27, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.11, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.95, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.11, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.95, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686638822903419, "mean_inference_ms": 1.2093328801666652, "mean_action_processing_ms": 0.13286058550342847, "mean_env_wait_ms": 0.854187887715765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 519.0, "episode_reward_min": 228.0, "episode_reward_mean": 448.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 111.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 224.235}, "hist_stats": {"episode_reward": [450.0, 416.0, 444.0, 416.0, 453.0, 450.0, 513.0, 408.0, 516.0, 456.0, 476.0, 444.0, 296.0, 419.0, 413.0, 441.0, 438.0, 516.0, 462.0, 453.0, 456.0, 399.0, 447.0, 465.0, 459.0, 450.0, 465.0, 432.0, 228.0, 450.0, 393.0, 501.0, 444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [228.0, 222.0, 210.0, 206.0, 217.0, 227.0, 211.0, 205.0, 217.0, 236.0, 226.0, 224.0, 258.0, 255.0, 209.0, 199.0, 256.0, 260.0, 235.0, 221.0, 238.0, 238.0, 223.0, 221.0, 150.0, 146.0, 216.0, 203.0, 197.0, 216.0, 228.0, 213.0, 225.0, 213.0, 246.0, 270.0, 226.0, 236.0, 231.0, 222.0, 234.0, 222.0, 197.0, 202.0, 218.0, 229.0, 241.0, 224.0, 229.0, 230.0, 226.0, 224.0, 233.0, 232.0, 217.0, 215.0, 117.0, 111.0, 220.0, 230.0, 197.0, 196.0, 258.0, 243.0, 219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686638822903419, "mean_inference_ms": 1.2093328801666652, "mean_action_processing_ms": 0.13286058550342847, "mean_env_wait_ms": 0.854187887715765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2406400, "num_agent_steps_trained": 2406400, "num_env_steps_sampled": 1203200, "num_env_steps_trained": 1203200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1203200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2406400, "timers": {"training_iteration_time_ms": 3677.405, "learn_time_ms": 1128.573, "learn_throughput": 11341.759, "synch_weights_time_ms": 11.522}, "counters": {"num_env_steps_sampled": 1203200, "num_env_steps_trained": 1203200, "num_agent_steps_sampled": 2406400, "num_agent_steps_trained": 2406400}, "done": false, "episodes_total": 3008, "training_iteration": 94, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-01", "timestamp": 1666580761, "time_this_iter_s": 3.6629152297973633, "time_total_s": 351.53656125068665, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 351.53656125068665, "timesteps_since_restore": 0, "iterations_since_restore": 94, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.660000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 157.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 136.24, "shaped_reward_min": 79, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.13, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.2, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.49, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.77, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.43, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.84, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.22, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.25, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.43, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.84, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.43, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.84, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.0097419737292228e-29, "cur_lr": 0.0010000000474974513, "total_loss": 5.3522700909525156e-05, "policy_loss": -0.00010001769987866282, "vf_loss": 7.527695655822754, "vf_explained_var": 0.6990654468536377, "kl": 0.0018153074197471142, "entropy": 1.1984570026397705, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1216000, "num_env_steps_trained": 1216000, "num_agent_steps_sampled": 2432000, "num_agent_steps_trained": 2432000}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 210.0, "episode_reward_mean": 451.84, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 96.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 225.92}, "custom_metrics": {"sparse_reward_mean": 157.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 136.24, "shaped_reward_min": 79, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.13, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.2, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.49, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 1.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.96, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.77, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.55, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.43, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.84, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 18, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.22, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.91, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.31, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.29, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.25, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.83, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 12.43, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.84, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 18, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.43, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.84, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 18, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686491869898288, "mean_inference_ms": 1.209051302333497, "mean_action_processing_ms": 0.1328341330608934, "mean_env_wait_ms": 0.8535721275289399, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 210.0, "episode_reward_mean": 451.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 96.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 225.92}, "hist_stats": {"episode_reward": [444.0, 456.0, 459.0, 459.0, 453.0, 456.0, 416.0, 456.0, 441.0, 459.0, 464.0, 404.0, 459.0, 408.0, 387.0, 456.0, 462.0, 444.0, 402.0, 456.0, 507.0, 456.0, 356.0, 516.0, 470.0, 462.0, 453.0, 413.0, 456.0, 510.0, 510.0, 456.0, 390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [219.0, 225.0, 236.0, 220.0, 254.0, 205.0, 214.0, 245.0, 226.0, 227.0, 229.0, 227.0, 221.0, 195.0, 222.0, 234.0, 229.0, 212.0, 229.0, 230.0, 232.0, 232.0, 190.0, 214.0, 239.0, 220.0, 205.0, 203.0, 193.0, 194.0, 223.0, 233.0, 232.0, 230.0, 201.0, 243.0, 200.0, 202.0, 228.0, 228.0, 254.0, 253.0, 240.0, 216.0, 178.0, 178.0, 258.0, 258.0, 233.0, 237.0, 234.0, 228.0, 233.0, 220.0, 204.0, 209.0, 244.0, 212.0, 253.0, 257.0, 258.0, 252.0, 233.0, 223.0, 184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.686491869898288, "mean_inference_ms": 1.209051302333497, "mean_action_processing_ms": 0.1328341330608934, "mean_env_wait_ms": 0.8535721275289399, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2432000, "num_agent_steps_trained": 2432000, "num_env_steps_sampled": 1216000, "num_env_steps_trained": 1216000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1216000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2432000, "timers": {"training_iteration_time_ms": 3673.561, "learn_time_ms": 1132.205, "learn_throughput": 11305.37, "synch_weights_time_ms": 12.381}, "counters": {"num_env_steps_sampled": 1216000, "num_env_steps_trained": 1216000, "num_agent_steps_sampled": 2432000, "num_agent_steps_trained": 2432000}, "done": false, "episodes_total": 3040, "training_iteration": 95, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-05", "timestamp": 1666580765, "time_this_iter_s": 3.685410499572754, "time_total_s": 355.2219717502594, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 355.2219717502594, "timesteps_since_restore": 0, "iterations_since_restore": 95, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.816666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 156.2, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 135.15, "shaped_reward_min": 43, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.17, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 13.86, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.2, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.17, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.65, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.56, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.52, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.27, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.23, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.98, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 12.56, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.52, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.56, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.52, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 5.048709868646114e-30, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009595048613846302, "policy_loss": -0.0011315889423713088, "vf_loss": 7.664634704589844, "vf_explained_var": 0.693601667881012, "kl": 0.0018983024638146162, "entropy": 1.1887578964233398, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1228800, "num_env_steps_trained": 1228800, "num_agent_steps_sampled": 2457600, "num_agent_steps_trained": 2457600}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 123.0, "episode_reward_mean": 447.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 223.775}, "custom_metrics": {"sparse_reward_mean": 156.2, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 135.15, "shaped_reward_min": 43, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.17, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 13.86, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.2, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 19, "useful_onion_pickup_agent_1_mean": 13.17, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 1.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.79, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.65, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 12.56, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.52, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.27, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.23, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.02, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 3.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.98, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 12.56, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.52, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.56, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.52, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6863606600516278, "mean_inference_ms": 1.2088294285353811, "mean_action_processing_ms": 0.13281410100148, "mean_env_wait_ms": 0.8530127778652258, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 123.0, "episode_reward_mean": 447.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 223.775}, "hist_stats": {"episode_reward": [390.0, 462.0, 444.0, 504.0, 473.0, 453.0, 459.0, 510.0, 516.0, 465.0, 513.0, 435.0, 470.0, 519.0, 453.0, 398.0, 456.0, 459.0, 422.0, 453.0, 396.0, 462.0, 456.0, 456.0, 453.0, 450.0, 516.0, 516.0, 302.0, 456.0, 456.0, 456.0, 453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [184.0, 206.0, 226.0, 236.0, 227.0, 217.0, 242.0, 262.0, 237.0, 236.0, 233.0, 220.0, 224.0, 235.0, 239.0, 271.0, 251.0, 265.0, 234.0, 231.0, 254.0, 259.0, 208.0, 227.0, 230.0, 240.0, 270.0, 249.0, 211.0, 242.0, 199.0, 199.0, 245.0, 211.0, 226.0, 233.0, 207.0, 215.0, 228.0, 225.0, 213.0, 183.0, 244.0, 218.0, 240.0, 216.0, 232.0, 224.0, 226.0, 227.0, 230.0, 220.0, 259.0, 257.0, 256.0, 260.0, 146.0, 156.0, 218.0, 238.0, 229.0, 227.0, 239.0, 217.0, 227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6863606600516278, "mean_inference_ms": 1.2088294285353811, "mean_action_processing_ms": 0.13281410100148, "mean_env_wait_ms": 0.8530127778652258, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2457600, "num_agent_steps_trained": 2457600, "num_env_steps_sampled": 1228800, "num_env_steps_trained": 1228800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1228800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2457600, "timers": {"training_iteration_time_ms": 3664.002, "learn_time_ms": 1123.086, "learn_throughput": 11397.169, "synch_weights_time_ms": 13.512}, "counters": {"num_env_steps_sampled": 1228800, "num_env_steps_trained": 1228800, "num_agent_steps_sampled": 2457600, "num_agent_steps_trained": 2457600}, "done": false, "episodes_total": 3072, "training_iteration": 96, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-08", "timestamp": 1666580768, "time_this_iter_s": 3.63649320602417, "time_total_s": 358.85846495628357, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 358.85846495628357, "timesteps_since_restore": 0, "iterations_since_restore": 96, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.500000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 156.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 136.25, "shaped_reward_min": 43, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.71, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.33, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 12.83, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.65, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.66, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.26, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.524354934323057e-30, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026760417968034744, "policy_loss": -0.002843119204044342, "vf_loss": 7.638774871826172, "vf_explained_var": 0.6889978647232056, "kl": 0.0018203468061983585, "entropy": 1.1935973167419434, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1241600, "num_env_steps_trained": 1241600, "num_agent_steps_sampled": 2483200, "num_agent_steps_trained": 2483200}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 123.0, "episode_reward_mean": 449.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 224.525}, "custom_metrics": {"sparse_reward_mean": 156.4, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 136.25, "shaped_reward_min": 43, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.71, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.33, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 12.83, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.65, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 1.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 1.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.66, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.6, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 18, "potting_onion_agent_1_mean": 12.98, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 3.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.85, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.75, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 9, "useful_dish_drop_agent_0_mean": 0.26, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 3.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 18, "optimal_onion_potting_agent_1_mean": 12.98, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 18, "viable_onion_potting_agent_1_mean": 12.98, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6862933417965331, "mean_inference_ms": 1.2086692382951651, "mean_action_processing_ms": 0.13279365519382014, "mean_env_wait_ms": 0.8524899468324156, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 123.0, "episode_reward_mean": 449.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 224.525}, "hist_stats": {"episode_reward": [453.0, 404.0, 465.0, 465.0, 516.0, 210.0, 459.0, 473.0, 459.0, 513.0, 450.0, 447.0, 522.0, 513.0, 462.0, 450.0, 461.0, 456.0, 459.0, 402.0, 462.0, 459.0, 510.0, 416.0, 441.0, 510.0, 450.0, 408.0, 459.0, 453.0, 462.0, 470.0, 239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 226.0, 200.0, 204.0, 236.0, 229.0, 236.0, 229.0, 267.0, 249.0, 114.0, 96.0, 234.0, 225.0, 241.0, 232.0, 228.0, 231.0, 254.0, 259.0, 236.0, 214.0, 226.0, 221.0, 257.0, 265.0, 249.0, 264.0, 234.0, 228.0, 216.0, 234.0, 221.0, 240.0, 231.0, 225.0, 220.0, 239.0, 195.0, 207.0, 244.0, 218.0, 248.0, 211.0, 259.0, 251.0, 200.0, 216.0, 210.0, 231.0, 267.0, 243.0, 240.0, 210.0, 210.0, 198.0, 234.0, 225.0, 223.0, 230.0, 229.0, 233.0, 232.0, 238.0, 108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6862933417965331, "mean_inference_ms": 1.2086692382951651, "mean_action_processing_ms": 0.13279365519382014, "mean_env_wait_ms": 0.8524899468324156, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2483200, "num_agent_steps_trained": 2483200, "num_env_steps_sampled": 1241600, "num_env_steps_trained": 1241600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1241600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2483200, "timers": {"training_iteration_time_ms": 3645.286, "learn_time_ms": 1124.006, "learn_throughput": 11387.84, "synch_weights_time_ms": 13.444}, "counters": {"num_env_steps_sampled": 1241600, "num_env_steps_trained": 1241600, "num_agent_steps_sampled": 2483200, "num_agent_steps_trained": 2483200}, "done": false, "episodes_total": 3104, "training_iteration": 97, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-12", "timestamp": 1666580772, "time_this_iter_s": 3.674302577972412, "time_total_s": 362.532767534256, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 362.532767534256, "timesteps_since_restore": 0, "iterations_since_restore": 97, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.6, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 157.0, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 137.17, "shaped_reward_min": 43, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.46, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.56, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 12.67, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.79, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.89, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.71, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.21, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.2, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.21, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.2, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.21, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.2, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.2621774671615285e-30, "cur_lr": 0.0010000000474974513, "total_loss": -0.002659280551597476, "policy_loss": -0.0028225441928952932, "vf_loss": 7.56541633605957, "vf_explained_var": 0.677125871181488, "kl": 0.0019446380902081728, "entropy": 1.186551809310913, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1254400, "num_env_steps_trained": 1254400, "num_agent_steps_sampled": 2508800, "num_agent_steps_trained": 2508800}, "sampler_results": {"episode_reward_max": 516.0, "episode_reward_min": 123.0, "episode_reward_mean": 451.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 225.585}, "custom_metrics": {"sparse_reward_mean": 157.0, "sparse_reward_min": 40, "sparse_reward_max": 180, "shaped_reward_mean": 137.17, "shaped_reward_min": 43, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.46, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.56, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 12.67, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.79, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.89, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.51, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.71, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.21, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.2, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.7, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.25, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 3.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.21, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.2, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.21, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.2, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6862475893075703, "mean_inference_ms": 1.2084870807976076, "mean_action_processing_ms": 0.1327752879991328, "mean_env_wait_ms": 0.8519673280789902, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 516.0, "episode_reward_min": 123.0, "episode_reward_mean": 451.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 57.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 225.585}, "hist_stats": {"episode_reward": [239.0, 450.0, 465.0, 456.0, 507.0, 453.0, 462.0, 459.0, 459.0, 464.0, 467.0, 461.0, 413.0, 456.0, 464.0, 237.0, 450.0, 507.0, 342.0, 456.0, 468.0, 444.0, 399.0, 402.0, 447.0, 510.0, 513.0, 516.0, 123.0, 462.0, 393.0, 459.0, 459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [108.0, 131.0, 220.0, 230.0, 225.0, 240.0, 234.0, 222.0, 260.0, 247.0, 233.0, 220.0, 238.0, 224.0, 233.0, 226.0, 227.0, 232.0, 244.0, 220.0, 230.0, 237.0, 228.0, 233.0, 205.0, 208.0, 216.0, 240.0, 237.0, 227.0, 117.0, 120.0, 219.0, 231.0, 262.0, 245.0, 171.0, 171.0, 224.0, 232.0, 219.0, 249.0, 208.0, 236.0, 197.0, 202.0, 205.0, 197.0, 211.0, 236.0, 244.0, 266.0, 257.0, 256.0, 247.0, 269.0, 66.0, 57.0, 230.0, 232.0, 186.0, 207.0, 232.0, 227.0, 232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6862475893075703, "mean_inference_ms": 1.2084870807976076, "mean_action_processing_ms": 0.1327752879991328, "mean_env_wait_ms": 0.8519673280789902, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2508800, "num_agent_steps_trained": 2508800, "num_env_steps_sampled": 1254400, "num_env_steps_trained": 1254400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1254400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2508800, "timers": {"training_iteration_time_ms": 3612.914, "learn_time_ms": 1114.988, "learn_throughput": 11479.942, "synch_weights_time_ms": 12.775}, "counters": {"num_env_steps_sampled": 1254400, "num_env_steps_trained": 1254400, "num_agent_steps_sampled": 2508800, "num_agent_steps_trained": 2508800}, "done": false, "episodes_total": 3136, "training_iteration": 98, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-16", "timestamp": 1666580776, "time_this_iter_s": 3.6175270080566406, "time_total_s": 366.1502945423126, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 366.1502945423126, "timesteps_since_restore": 0, "iterations_since_restore": 98, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.849999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 161.4, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 141.62, "shaped_reward_min": 108, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.95, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.55, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.29, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.8, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.83, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.24, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.5, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.23, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.2, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.83, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.24, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.83, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.24, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.3108873358076425e-31, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005003714468330145, "policy_loss": 0.00033585677738301456, "vf_loss": 7.55753231048584, "vf_explained_var": 0.6947751045227051, "kl": 0.0021784892305731773, "entropy": 1.1824755668640137, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1267200, "num_env_steps_trained": 1267200, "num_agent_steps_sampled": 2534400, "num_agent_steps_trained": 2534400}, "sampler_results": {"episode_reward_max": 519.0, "episode_reward_min": 348.0, "episode_reward_mean": 464.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 232.21}, "custom_metrics": {"sparse_reward_mean": 161.4, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 141.62, "shaped_reward_min": 108, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.95, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.55, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.29, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.8, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.78, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 1.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.4, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.67, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.83, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.24, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.5, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.74, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.23, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.2, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.17, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.13, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.83, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.24, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.83, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.24, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6861435262389413, "mean_inference_ms": 1.2082559596790565, "mean_action_processing_ms": 0.13275586923142962, "mean_env_wait_ms": 0.8514212289161043, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 519.0, "episode_reward_min": 348.0, "episode_reward_mean": 464.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 270.0}, "policy_reward_mean": {"ppo": 232.21}, "hist_stats": {"episode_reward": [459.0, 399.0, 413.0, 513.0, 459.0, 516.0, 468.0, 510.0, 462.0, 459.0, 399.0, 465.0, 410.0, 401.0, 510.0, 470.0, 473.0, 470.0, 348.0, 513.0, 459.0, 510.0, 473.0, 453.0, 513.0, 459.0, 516.0, 513.0, 419.0, 453.0, 402.0, 450.0, 504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [232.0, 227.0, 209.0, 190.0, 204.0, 209.0, 266.0, 247.0, 221.0, 238.0, 255.0, 261.0, 230.0, 238.0, 249.0, 261.0, 241.0, 221.0, 225.0, 234.0, 187.0, 212.0, 237.0, 228.0, 197.0, 213.0, 207.0, 194.0, 251.0, 259.0, 245.0, 225.0, 231.0, 242.0, 231.0, 239.0, 169.0, 179.0, 270.0, 243.0, 235.0, 224.0, 259.0, 251.0, 245.0, 228.0, 227.0, 226.0, 246.0, 267.0, 219.0, 240.0, 252.0, 264.0, 265.0, 248.0, 205.0, 214.0, 215.0, 238.0, 202.0, 200.0, 231.0, 219.0, 255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6861435262389413, "mean_inference_ms": 1.2082559596790565, "mean_action_processing_ms": 0.13275586923142962, "mean_env_wait_ms": 0.8514212289161043, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2534400, "num_agent_steps_trained": 2534400, "num_env_steps_sampled": 1267200, "num_env_steps_trained": 1267200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1267200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2534400, "timers": {"training_iteration_time_ms": 3602.781, "learn_time_ms": 1111.695, "learn_throughput": 11513.947, "synch_weights_time_ms": 13.184}, "counters": {"num_env_steps_sampled": 1267200, "num_env_steps_trained": 1267200, "num_agent_steps_sampled": 2534400, "num_agent_steps_trained": 2534400}, "done": false, "episodes_total": 3168, "training_iteration": 99, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-20", "timestamp": 1666580780, "time_this_iter_s": 3.540189743041992, "time_total_s": 369.6904842853546, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 369.6904842853546, "timesteps_since_restore": 0, "iterations_since_restore": 99, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.580000000000002, "ram_util_percent": 10.6}} +{"evaluation": {"average_sparse_reward": 100.0, "num_healthy_workers": 0, "num_recreated_workers": 0}, "custom_metrics": {"sparse_reward_mean": 165.4, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 143.52, "shaped_reward_min": 118, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.1, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.48, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.54, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.84, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.97, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.24, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.21, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.97, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.97, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.1554436679038213e-31, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026895212940871716, "policy_loss": -0.0028634630143642426, "vf_loss": 7.576631546020508, "vf_explained_var": 0.6813052296638489, "kl": 0.0019578980281949043, "entropy": 1.167441725730896, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1280000, "num_env_steps_trained": 1280000, "num_agent_steps_sampled": 2560000, "num_agent_steps_trained": 2560000}, "sampler_results": {"episode_reward_max": 519.0, "episode_reward_min": 399.0, "episode_reward_mean": 474.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 237.16}, "custom_metrics": {"sparse_reward_mean": 165.4, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 143.52, "shaped_reward_min": 118, "shaped_reward_max": 161, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.1, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.48, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.54, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.84, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.77, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.33, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 12.97, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.81, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.24, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.21, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 12.97, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.97, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6860127053263756, "mean_inference_ms": 1.2080154662639697, "mean_action_processing_ms": 0.13273940443506851, "mean_env_wait_ms": 0.8508791865244192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 519.0, "episode_reward_min": 399.0, "episode_reward_mean": 474.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 187.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 237.16}, "hist_stats": {"episode_reward": [504.0, 413.0, 459.0, 450.0, 462.0, 513.0, 459.0, 462.0, 462.0, 416.0, 453.0, 465.0, 456.0, 419.0, 513.0, 462.0, 465.0, 513.0, 438.0, 453.0, 459.0, 507.0, 476.0, 510.0, 459.0, 407.0, 481.0, 456.0, 470.0, 405.0, 465.0, 416.0, 444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [255.0, 249.0, 206.0, 207.0, 244.0, 215.0, 208.0, 242.0, 230.0, 232.0, 254.0, 259.0, 238.0, 221.0, 229.0, 233.0, 218.0, 244.0, 202.0, 214.0, 217.0, 236.0, 226.0, 239.0, 222.0, 234.0, 208.0, 211.0, 258.0, 255.0, 226.0, 236.0, 223.0, 242.0, 251.0, 262.0, 216.0, 222.0, 224.0, 229.0, 231.0, 228.0, 239.0, 268.0, 233.0, 243.0, 253.0, 257.0, 218.0, 241.0, 202.0, 205.0, 236.0, 245.0, 214.0, 242.0, 235.0, 235.0, 218.0, 187.0, 244.0, 221.0, 221.0, 195.0, 233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6860127053263756, "mean_inference_ms": 1.2080154662639697, "mean_action_processing_ms": 0.13273940443506851, "mean_env_wait_ms": 0.8508791865244192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2560000, "num_agent_steps_trained": 2560000, "num_env_steps_sampled": 1280000, "num_env_steps_trained": 1280000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1280000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2560000, "timers": {"training_iteration_time_ms": 3603.202, "learn_time_ms": 1117.688, "learn_throughput": 11452.208, "synch_weights_time_ms": 12.968}, "counters": {"num_env_steps_sampled": 1280000, "num_env_steps_trained": 1280000, "num_agent_steps_sampled": 2560000, "num_agent_steps_trained": 2560000}, "done": false, "episodes_total": 3200, "training_iteration": 100, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-27", "timestamp": 1666580787, "time_this_iter_s": 7.235398530960083, "time_total_s": 376.9258828163147, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 376.9258828163147, "timesteps_since_restore": 0, "iterations_since_restore": 100, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 14.363636363636367, "ram_util_percent": 10.599999999999998}} +{"custom_metrics": {"sparse_reward_mean": 167.0, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 144.04, "shaped_reward_min": 93, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 13.97, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.07, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.38, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.81, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.58, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 12.91, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.01, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.58, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 12.91, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.58, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 12.91, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.5777218339519106e-31, "cur_lr": 0.0010000000474974513, "total_loss": -0.001677508233115077, "policy_loss": -0.0018479716964066029, "vf_loss": 7.583566188812256, "vf_explained_var": 0.7014689445495605, "kl": 0.001794546958990395, "entropy": 1.1757853031158447, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1292800, "num_env_steps_trained": 1292800, "num_agent_steps_sampled": 2585600, "num_agent_steps_trained": 2585600}, "sampler_results": {"episode_reward_max": 519.0, "episode_reward_min": 333.0, "episode_reward_mean": 478.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 158.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 239.02}, "custom_metrics": {"sparse_reward_mean": 167.0, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 144.04, "shaped_reward_min": 93, "shaped_reward_max": 159, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 13.97, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.07, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.38, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.81, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.75, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.42, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.58, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 12.91, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.48, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.74, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.8, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.31, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.01, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.58, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 12.91, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.58, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 12.91, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6858588435315744, "mean_inference_ms": 1.207797081605332, "mean_action_processing_ms": 0.13272237266615255, "mean_env_wait_ms": 0.8503743147416343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 519.0, "episode_reward_min": 333.0, "episode_reward_mean": 478.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 158.0}, "policy_reward_max": {"ppo": 271.0}, "policy_reward_mean": {"ppo": 239.02}, "hist_stats": {"episode_reward": [444.0, 462.0, 450.0, 473.0, 459.0, 519.0, 467.0, 465.0, 465.0, 473.0, 519.0, 421.0, 519.0, 453.0, 510.0, 462.0, 476.0, 516.0, 470.0, 450.0, 462.0, 456.0, 467.0, 492.0, 455.0, 402.0, 519.0, 459.0, 513.0, 510.0, 444.0, 459.0, 465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [233.0, 211.0, 221.0, 241.0, 234.0, 216.0, 241.0, 232.0, 239.0, 220.0, 255.0, 264.0, 231.0, 236.0, 223.0, 242.0, 234.0, 231.0, 242.0, 231.0, 258.0, 261.0, 205.0, 216.0, 258.0, 261.0, 210.0, 243.0, 254.0, 256.0, 235.0, 227.0, 235.0, 241.0, 253.0, 263.0, 233.0, 237.0, 228.0, 222.0, 234.0, 228.0, 230.0, 226.0, 234.0, 233.0, 251.0, 241.0, 215.0, 240.0, 195.0, 207.0, 258.0, 261.0, 225.0, 234.0, 247.0, 266.0, 253.0, 257.0, 234.0, 210.0, 240.0, 219.0, 234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6858588435315744, "mean_inference_ms": 1.207797081605332, "mean_action_processing_ms": 0.13272237266615255, "mean_env_wait_ms": 0.8503743147416343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2585600, "num_agent_steps_trained": 2585600, "num_env_steps_sampled": 1292800, "num_env_steps_trained": 1292800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1292800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2585600, "timers": {"training_iteration_time_ms": 3600.864, "learn_time_ms": 1114.9, "learn_throughput": 11480.855, "synch_weights_time_ms": 13.357}, "counters": {"num_env_steps_sampled": 1292800, "num_env_steps_trained": 1292800, "num_agent_steps_sampled": 2585600, "num_agent_steps_trained": 2585600}, "done": false, "episodes_total": 3232, "training_iteration": 101, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-31", "timestamp": 1666580791, "time_this_iter_s": 3.6746954917907715, "time_total_s": 380.60057830810547, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 380.60057830810547, "timesteps_since_restore": 0, "iterations_since_restore": 101, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.740000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 167.2, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 143.43, "shaped_reward_min": 93, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.47, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 14.14, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.52, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.08, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.52, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.08, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.52, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.08, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.888609169759553e-32, "cur_lr": 0.0010000000474974513, "total_loss": -0.004032195080071688, "policy_loss": -0.004206694662570953, "vf_loss": 7.6045732498168945, "vf_explained_var": 0.6989070177078247, "kl": 0.0014940658584237099, "entropy": 1.1719143390655518, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1305600, "num_env_steps_trained": 1305600, "num_agent_steps_sampled": 2611200, "num_agent_steps_trained": 2611200}, "sampler_results": {"episode_reward_max": 522.0, "episode_reward_min": 333.0, "episode_reward_mean": 477.83, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 158.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 238.915}, "custom_metrics": {"sparse_reward_mean": 167.2, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 143.43, "shaped_reward_min": 93, "shaped_reward_max": 162, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.47, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 14.14, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.57, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.6, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.52, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.08, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.43, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.86, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.9, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.36, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.35, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 4.12, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.09, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.52, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.08, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.52, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.08, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6857117863988279, "mean_inference_ms": 1.2078722559315558, "mean_action_processing_ms": 0.1327028684250975, "mean_env_wait_ms": 0.850440345842403, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 522.0, "episode_reward_min": 333.0, "episode_reward_mean": 477.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 158.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 238.915}, "hist_stats": {"episode_reward": [465.0, 513.0, 418.0, 450.0, 501.0, 444.0, 516.0, 473.0, 467.0, 513.0, 504.0, 446.0, 447.0, 501.0, 516.0, 510.0, 510.0, 504.0, 447.0, 519.0, 513.0, 513.0, 459.0, 447.0, 516.0, 519.0, 510.0, 519.0, 513.0, 470.0, 516.0, 519.0, 399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 231.0, 253.0, 260.0, 206.0, 212.0, 221.0, 229.0, 242.0, 259.0, 229.0, 215.0, 256.0, 260.0, 225.0, 248.0, 223.0, 244.0, 255.0, 258.0, 255.0, 249.0, 219.0, 227.0, 222.0, 225.0, 259.0, 242.0, 255.0, 261.0, 260.0, 250.0, 253.0, 257.0, 253.0, 251.0, 219.0, 228.0, 271.0, 248.0, 261.0, 252.0, 251.0, 262.0, 220.0, 239.0, 225.0, 222.0, 262.0, 254.0, 266.0, 253.0, 261.0, 249.0, 250.0, 269.0, 254.0, 259.0, 234.0, 236.0, 253.0, 263.0, 262.0, 257.0, 199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6857117863988279, "mean_inference_ms": 1.2078722559315558, "mean_action_processing_ms": 0.1327028684250975, "mean_env_wait_ms": 0.850440345842403, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2611200, "num_agent_steps_trained": 2611200, "num_env_steps_sampled": 1305600, "num_env_steps_trained": 1305600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1305600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2611200, "timers": {"training_iteration_time_ms": 3629.146, "learn_time_ms": 1112.093, "learn_throughput": 11509.827, "synch_weights_time_ms": 13.994}, "counters": {"num_env_steps_sampled": 1305600, "num_env_steps_trained": 1305600, "num_agent_steps_sampled": 2611200, "num_agent_steps_trained": 2611200}, "done": false, "episodes_total": 3264, "training_iteration": 102, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-35", "timestamp": 1666580795, "time_this_iter_s": 4.029216766357422, "time_total_s": 384.6297950744629, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 384.6297950744629, "timesteps_since_restore": 0, "iterations_since_restore": 102, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.53333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 164.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 142.26, "shaped_reward_min": 48, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.53, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 14.05, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.53, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 12.85, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.38, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.99, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 3.95, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.53, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 12.85, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.53, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 12.85, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.9443045848797766e-32, "cur_lr": 0.0010000000474974513, "total_loss": 0.000442040734924376, "policy_loss": 0.0002623022301122546, "vf_loss": 7.644353866577148, "vf_explained_var": 0.6994333267211914, "kl": 0.002050078473985195, "entropy": 1.1693940162658691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1318400, "num_env_steps_trained": 1318400, "num_agent_steps_sampled": 2636800, "num_agent_steps_trained": 2636800}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 168.0, "episode_reward_mean": 470.66, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 235.33}, "custom_metrics": {"sparse_reward_mean": 164.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 142.26, "shaped_reward_min": 48, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.53, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 14.05, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 13.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.67, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.4, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.53, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 12.85, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.38, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.82, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.79, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.35, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.25, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 5, "soup_pickup_agent_0_mean": 3.99, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 3.95, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.53, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 12.85, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.53, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 12.85, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6855786813359219, "mean_inference_ms": 1.2080034013854677, "mean_action_processing_ms": 0.13268411791367785, "mean_env_wait_ms": 0.8505193336217425, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 168.0, "episode_reward_mean": 470.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 80.0}, "policy_reward_max": {"ppo": 272.0}, "policy_reward_mean": {"ppo": 235.33}, "hist_stats": {"episode_reward": [399.0, 516.0, 467.0, 513.0, 513.0, 462.0, 513.0, 459.0, 516.0, 333.0, 513.0, 410.0, 459.0, 462.0, 456.0, 461.0, 516.0, 513.0, 447.0, 465.0, 476.0, 519.0, 447.0, 401.0, 513.0, 513.0, 470.0, 410.0, 516.0, 516.0, 459.0, 513.0, 447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [199.0, 200.0, 252.0, 264.0, 248.0, 219.0, 249.0, 264.0, 263.0, 250.0, 234.0, 228.0, 261.0, 252.0, 218.0, 241.0, 254.0, 262.0, 175.0, 158.0, 253.0, 260.0, 208.0, 202.0, 228.0, 231.0, 240.0, 222.0, 215.0, 241.0, 214.0, 247.0, 260.0, 256.0, 254.0, 259.0, 231.0, 216.0, 231.0, 234.0, 250.0, 226.0, 260.0, 259.0, 230.0, 217.0, 199.0, 202.0, 263.0, 250.0, 250.0, 263.0, 241.0, 229.0, 204.0, 206.0, 254.0, 262.0, 249.0, 267.0, 221.0, 238.0, 261.0, 252.0, 231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6855786813359219, "mean_inference_ms": 1.2080034013854677, "mean_action_processing_ms": 0.13268411791367785, "mean_env_wait_ms": 0.8505193336217425, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2636800, "num_agent_steps_trained": 2636800, "num_env_steps_sampled": 1318400, "num_env_steps_trained": 1318400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1318400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2636800, "timers": {"training_iteration_time_ms": 3640.331, "learn_time_ms": 1102.938, "learn_throughput": 11605.365, "synch_weights_time_ms": 13.813}, "counters": {"num_env_steps_sampled": 1318400, "num_env_steps_trained": 1318400, "num_agent_steps_sampled": 2636800, "num_agent_steps_trained": 2636800}, "done": false, "episodes_total": 3296, "training_iteration": 103, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-39", "timestamp": 1666580799, "time_this_iter_s": 3.794299602508545, "time_total_s": 388.42409467697144, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 388.42409467697144, "timesteps_since_restore": 0, "iterations_since_restore": 103, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.8, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 162.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 141.22, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.08, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.03, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.56, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.49, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.17, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.07, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.17, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.07, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.17, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.07, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.9721522924398883e-32, "cur_lr": 0.0010000000474974513, "total_loss": -0.0031845830380916595, "policy_loss": -0.0033730631694197655, "vf_loss": 7.6778059005737305, "vf_explained_var": 0.6982693672180176, "kl": 0.0018961275927722454, "entropy": 1.1586014032363892, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1331200, "num_env_steps_trained": 1331200, "num_agent_steps_sampled": 2662400, "num_agent_steps_trained": 2662400}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 466.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 277.0}, "policy_reward_mean": {"ppo": 233.01}, "custom_metrics": {"sparse_reward_mean": 162.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 141.22, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.08, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.03, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.56, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 13.49, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.59, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.17, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.07, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 3.44, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.72, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.87, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.24, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.11, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.17, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.07, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.17, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.07, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685451167099999, "mean_inference_ms": 1.208162856694142, "mean_action_processing_ms": 0.1326708078206156, "mean_env_wait_ms": 0.8506212298485927, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 466.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 277.0}, "policy_reward_mean": {"ppo": 233.01}, "hist_stats": {"episode_reward": [447.0, 450.0, 516.0, 456.0, 507.0, 513.0, 419.0, 510.0, 519.0, 444.0, 513.0, 453.0, 441.0, 510.0, 435.0, 510.0, 450.0, 453.0, 473.0, 522.0, 462.0, 519.0, 419.0, 516.0, 462.0, 396.0, 459.0, 468.0, 387.0, 501.0, 455.0, 465.0, 516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [231.0, 216.0, 223.0, 227.0, 250.0, 266.0, 228.0, 228.0, 258.0, 249.0, 252.0, 261.0, 216.0, 203.0, 249.0, 261.0, 252.0, 267.0, 223.0, 221.0, 261.0, 252.0, 222.0, 231.0, 225.0, 216.0, 263.0, 247.0, 207.0, 228.0, 260.0, 250.0, 233.0, 217.0, 229.0, 224.0, 229.0, 244.0, 267.0, 255.0, 217.0, 245.0, 270.0, 249.0, 213.0, 206.0, 250.0, 266.0, 242.0, 220.0, 181.0, 215.0, 231.0, 228.0, 230.0, 238.0, 200.0, 187.0, 244.0, 257.0, 219.0, 236.0, 247.0, 218.0, 247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685451167099999, "mean_inference_ms": 1.208162856694142, "mean_action_processing_ms": 0.1326708078206156, "mean_env_wait_ms": 0.8506212298485927, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2662400, "num_agent_steps_trained": 2662400, "num_env_steps_sampled": 1331200, "num_env_steps_trained": 1331200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1331200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2662400, "timers": {"training_iteration_time_ms": 3642.307, "learn_time_ms": 1101.425, "learn_throughput": 11621.309, "synch_weights_time_ms": 13.658}, "counters": {"num_env_steps_sampled": 1331200, "num_env_steps_trained": 1331200, "num_agent_steps_sampled": 2662400, "num_agent_steps_trained": 2662400}, "done": false, "episodes_total": 3328, "training_iteration": 104, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-43", "timestamp": 1666580803, "time_this_iter_s": 3.688504695892334, "time_total_s": 392.11259937286377, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 392.11259937286377, "timesteps_since_restore": 0, "iterations_since_restore": 104, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.020000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 164.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 144.37, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.65, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.3, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.11, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.68, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.68, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.68, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.860761462199441e-33, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018112241523340344, "policy_loss": -0.0019815461710095406, "vf_loss": 7.434688568115234, "vf_explained_var": 0.6963086128234863, "kl": 0.0017134303925558925, "entropy": 1.1462903022766113, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1344000, "num_env_steps_trained": 1344000, "num_agent_steps_sampled": 2688000, "num_agent_steps_trained": 2688000}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 473.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 236.585}, "custom_metrics": {"sparse_reward_mean": 164.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 144.37, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 14.65, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.3, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.11, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.63, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.36, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.68, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.59, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.4, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.68, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.68, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853260190455254, "mean_inference_ms": 1.2081047276134962, "mean_action_processing_ms": 0.1326570049745876, "mean_env_wait_ms": 0.8501976782803979, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 473.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 236.585}, "hist_stats": {"episode_reward": [516.0, 462.0, 519.0, 412.0, 408.0, 513.0, 393.0, 507.0, 422.0, 344.0, 516.0, 462.0, 501.0, 470.0, 456.0, 408.0, 513.0, 519.0, 422.0, 525.0, 525.0, 504.0, 459.0, 168.0, 430.0, 484.0, 516.0, 504.0, 519.0, 516.0, 435.0, 507.0, 470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 269.0, 235.0, 227.0, 272.0, 247.0, 211.0, 201.0, 208.0, 200.0, 267.0, 246.0, 197.0, 196.0, 254.0, 253.0, 214.0, 208.0, 165.0, 179.0, 253.0, 263.0, 230.0, 232.0, 266.0, 235.0, 231.0, 239.0, 222.0, 234.0, 199.0, 209.0, 269.0, 244.0, 260.0, 259.0, 209.0, 213.0, 266.0, 259.0, 260.0, 265.0, 250.0, 254.0, 217.0, 242.0, 80.0, 88.0, 217.0, 213.0, 231.0, 253.0, 256.0, 260.0, 238.0, 266.0, 258.0, 261.0, 246.0, 270.0, 219.0, 216.0, 244.0, 263.0, 221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853260190455254, "mean_inference_ms": 1.2081047276134962, "mean_action_processing_ms": 0.1326570049745876, "mean_env_wait_ms": 0.8501976782803979, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2688000, "num_agent_steps_trained": 2688000, "num_env_steps_sampled": 1344000, "num_env_steps_trained": 1344000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1344000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2688000, "timers": {"training_iteration_time_ms": 3643.382, "learn_time_ms": 1101.078, "learn_throughput": 11624.97, "synch_weights_time_ms": 12.672}, "counters": {"num_env_steps_sampled": 1344000, "num_env_steps_trained": 1344000, "num_agent_steps_sampled": 2688000, "num_agent_steps_trained": 2688000}, "done": false, "episodes_total": 3360, "training_iteration": 105, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-47", "timestamp": 1666580807, "time_this_iter_s": 3.699756622314453, "time_total_s": 395.8123559951782, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 395.8123559951782, "timesteps_since_restore": 0, "iterations_since_restore": 105, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.616666666666667, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 166.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 145.88, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.1, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.49, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.67, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.0, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.14, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.62, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.14, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.14, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.62, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.14, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.62, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.930380731099721e-33, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009489480871707201, "policy_loss": -0.00112776062451303, "vf_loss": 7.568265914916992, "vf_explained_var": 0.6967692375183105, "kl": 0.0018486479530110955, "entropy": 1.1560269594192505, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1356800, "num_env_steps_trained": 1356800, "num_agent_steps_sampled": 2713600, "num_agent_steps_trained": 2713600}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 478.68, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 239.34}, "custom_metrics": {"sparse_reward_mean": 166.4, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 145.88, "shaped_reward_min": 26, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.1, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.49, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.67, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.0, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.64, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.62, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.29, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.35, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.14, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.62, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 3.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.63, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.32, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.19, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.14, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 5, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.14, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.62, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.14, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.62, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852323252055342, "mean_inference_ms": 1.2080135818780215, "mean_action_processing_ms": 0.13264710505548277, "mean_env_wait_ms": 0.8497943535045519, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 66.0, "episode_reward_mean": 478.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 239.34}, "hist_stats": {"episode_reward": [470.0, 525.0, 516.0, 504.0, 473.0, 525.0, 513.0, 416.0, 449.0, 456.0, 513.0, 453.0, 462.0, 470.0, 470.0, 424.0, 516.0, 516.0, 464.0, 513.0, 450.0, 180.0, 465.0, 467.0, 405.0, 516.0, 513.0, 408.0, 507.0, 513.0, 519.0, 498.0, 513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [221.0, 249.0, 256.0, 269.0, 265.0, 251.0, 256.0, 248.0, 241.0, 232.0, 253.0, 272.0, 257.0, 256.0, 213.0, 203.0, 230.0, 219.0, 232.0, 224.0, 260.0, 253.0, 230.0, 223.0, 239.0, 223.0, 233.0, 237.0, 238.0, 232.0, 202.0, 222.0, 256.0, 260.0, 260.0, 256.0, 228.0, 236.0, 267.0, 246.0, 217.0, 233.0, 94.0, 86.0, 242.0, 223.0, 227.0, 240.0, 205.0, 200.0, 258.0, 258.0, 258.0, 255.0, 204.0, 204.0, 260.0, 247.0, 258.0, 255.0, 248.0, 271.0, 248.0, 250.0, 277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852323252055342, "mean_inference_ms": 1.2080135818780215, "mean_action_processing_ms": 0.13264710505548277, "mean_env_wait_ms": 0.8497943535045519, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2713600, "num_agent_steps_trained": 2713600, "num_env_steps_sampled": 1356800, "num_env_steps_trained": 1356800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1356800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2713600, "timers": {"training_iteration_time_ms": 3649.561, "learn_time_ms": 1105.95, "learn_throughput": 11573.764, "synch_weights_time_ms": 11.506}, "counters": {"num_env_steps_sampled": 1356800, "num_env_steps_trained": 1356800, "num_agent_steps_sampled": 2713600, "num_agent_steps_trained": 2713600}, "done": false, "episodes_total": 3392, "training_iteration": 106, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-51", "timestamp": 1666580811, "time_this_iter_s": 3.704148292541504, "time_total_s": 399.5165042877197, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 399.5165042877197, "timesteps_since_restore": 0, "iterations_since_restore": 106, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.98, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 167.8, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 147.14, "shaped_reward_min": 26, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.94, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.13, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.49, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.62, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.92, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.07, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.92, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.07, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.92, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.07, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.4651903655498604e-33, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011604693718254566, "policy_loss": -0.0013407319784164429, "vf_loss": 7.517999649047852, "vf_explained_var": 0.7039157748222351, "kl": 0.0018529343651607633, "entropy": 1.1430747509002686, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1369600, "num_env_steps_trained": 1369600, "num_agent_steps_sampled": 2739200, "num_agent_steps_trained": 2739200}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 66.0, "episode_reward_mean": 482.74, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 241.37}, "custom_metrics": {"sparse_reward_mean": 167.8, "sparse_reward_min": 20, "sparse_reward_max": 180, "shaped_reward_mean": 147.14, "shaped_reward_min": 26, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 13.94, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.13, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.49, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.62, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.63, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 12.92, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.07, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.71, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.53, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.27, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.54, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.1, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 12.92, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.07, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 12.92, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.07, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851764219862165, "mean_inference_ms": 1.207909821947583, "mean_action_processing_ms": 0.1326402215421815, "mean_env_wait_ms": 0.8493898967596079, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 66.0, "episode_reward_mean": 482.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 29.0}, "policy_reward_max": {"ppo": 280.0}, "policy_reward_mean": {"ppo": 241.37}, "hist_stats": {"episode_reward": [513.0, 516.0, 66.0, 513.0, 510.0, 513.0, 465.0, 519.0, 510.0, 516.0, 473.0, 465.0, 456.0, 510.0, 522.0, 461.0, 473.0, 501.0, 510.0, 513.0, 516.0, 458.0, 410.0, 516.0, 507.0, 519.0, 421.0, 473.0, 525.0, 504.0, 519.0, 510.0, 519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 236.0, 267.0, 249.0, 29.0, 37.0, 261.0, 252.0, 254.0, 256.0, 256.0, 257.0, 211.0, 254.0, 254.0, 265.0, 248.0, 262.0, 244.0, 272.0, 246.0, 227.0, 239.0, 226.0, 238.0, 218.0, 250.0, 260.0, 280.0, 242.0, 224.0, 237.0, 232.0, 241.0, 252.0, 249.0, 254.0, 256.0, 239.0, 274.0, 260.0, 256.0, 220.0, 238.0, 205.0, 205.0, 256.0, 260.0, 252.0, 255.0, 264.0, 255.0, 216.0, 205.0, 235.0, 238.0, 256.0, 269.0, 247.0, 257.0, 265.0, 254.0, 262.0, 248.0, 257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851764219862165, "mean_inference_ms": 1.207909821947583, "mean_action_processing_ms": 0.1326402215421815, "mean_env_wait_ms": 0.8493898967596079, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2739200, "num_agent_steps_trained": 2739200, "num_env_steps_sampled": 1369600, "num_env_steps_trained": 1369600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1369600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2739200, "timers": {"training_iteration_time_ms": 3638.166, "learn_time_ms": 1094.745, "learn_throughput": 11692.226, "synch_weights_time_ms": 11.752}, "counters": {"num_env_steps_sampled": 1369600, "num_env_steps_trained": 1369600, "num_agent_steps_sampled": 2739200, "num_agent_steps_trained": 2739200}, "done": false, "episodes_total": 3424, "training_iteration": 107, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-54", "timestamp": 1666580814, "time_this_iter_s": 3.574979543685913, "time_total_s": 403.09148383140564, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 403.09148383140564, "timesteps_since_restore": 0, "iterations_since_restore": 107, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.75, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 168.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 148.8, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.56, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.87, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.35, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.44, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.78, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.22, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.44, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.78, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.44, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.78, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.2325951827749302e-33, "cur_lr": 0.0010000000474974513, "total_loss": -0.00016931036952883005, "policy_loss": -0.00034154567401856184, "vf_loss": 7.427540302276611, "vf_explained_var": 0.7095834016799927, "kl": 0.0018368299352005124, "entropy": 1.141036033630371, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1382400, "num_env_steps_trained": 1382400, "num_agent_steps_sampled": 2764800, "num_agent_steps_trained": 2764800}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 180.0, "episode_reward_mean": 486.4, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 273.0}, "policy_reward_mean": {"ppo": 243.2}, "custom_metrics": {"sparse_reward_mean": 168.8, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 148.8, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.56, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.87, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.01, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.35, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.73, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.83, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.27, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.48, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.44, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.78, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 3.82, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.67, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.22, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 4, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.22, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.16, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.44, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.78, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.44, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.78, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851621848436128, "mean_inference_ms": 1.2078610476021172, "mean_action_processing_ms": 0.1326424297598641, "mean_env_wait_ms": 0.8490743544207925, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 180.0, "episode_reward_mean": 486.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 273.0}, "policy_reward_mean": {"ppo": 243.2}, "hist_stats": {"episode_reward": [519.0, 464.0, 468.0, 519.0, 513.0, 522.0, 519.0, 413.0, 516.0, 425.0, 459.0, 465.0, 467.0, 449.0, 513.0, 513.0, 452.0, 513.0, 447.0, 519.0, 513.0, 510.0, 516.0, 473.0, 465.0, 519.0, 465.0, 458.0, 465.0, 456.0, 456.0, 459.0, 516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 262.0, 242.0, 222.0, 236.0, 232.0, 258.0, 261.0, 254.0, 259.0, 260.0, 262.0, 248.0, 271.0, 211.0, 202.0, 261.0, 255.0, 213.0, 212.0, 228.0, 231.0, 242.0, 223.0, 231.0, 236.0, 228.0, 221.0, 243.0, 270.0, 259.0, 254.0, 213.0, 239.0, 270.0, 243.0, 226.0, 221.0, 262.0, 257.0, 253.0, 260.0, 247.0, 263.0, 252.0, 264.0, 241.0, 232.0, 235.0, 230.0, 259.0, 260.0, 234.0, 231.0, 213.0, 245.0, 236.0, 229.0, 236.0, 220.0, 243.0, 213.0, 245.0, 214.0, 260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851621848436128, "mean_inference_ms": 1.2078610476021172, "mean_action_processing_ms": 0.1326424297598641, "mean_env_wait_ms": 0.8490743544207925, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2764800, "num_agent_steps_trained": 2764800, "num_env_steps_sampled": 1382400, "num_env_steps_trained": 1382400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1382400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2764800, "timers": {"training_iteration_time_ms": 3642.999, "learn_time_ms": 1092.393, "learn_throughput": 11717.398, "synch_weights_time_ms": 11.784}, "counters": {"num_env_steps_sampled": 1382400, "num_env_steps_trained": 1382400, "num_agent_steps_sampled": 2764800, "num_agent_steps_trained": 2764800}, "done": false, "episodes_total": 3456, "training_iteration": 108, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-06-58", "timestamp": 1666580818, "time_this_iter_s": 3.658635377883911, "time_total_s": 406.75011920928955, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 406.75011920928955, "timesteps_since_restore": 0, "iterations_since_restore": 108, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.779999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 169.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 150.21, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.21, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.21, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.67, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.64, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.16, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.2, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.55, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.16, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.2, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.16, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.2, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.162975913874651e-34, "cur_lr": 0.0010000000474974513, "total_loss": -0.0027838372625410557, "policy_loss": -0.002956016920506954, "vf_loss": 7.4473161697387695, "vf_explained_var": 0.7100299596786499, "kl": 0.0014991976786404848, "entropy": 1.1451025009155273, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1395200, "num_env_steps_trained": 1395200, "num_agent_steps_sampled": 2790400, "num_agent_steps_trained": 2790400}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 180.0, "episode_reward_mean": 489.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 244.505}, "custom_metrics": {"sparse_reward_mean": 169.4, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 150.21, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.21, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.21, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.67, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.64, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.68, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.76, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.43, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.16, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.2, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.17, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.55, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.07, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.16, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.2, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.16, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.2, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851414554562826, "mean_inference_ms": 1.2077767111984112, "mean_action_processing_ms": 0.13264407790813673, "mean_env_wait_ms": 0.8487302506137289, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 180.0, "episode_reward_mean": 489.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 274.0}, "policy_reward_mean": {"ppo": 244.505}, "hist_stats": {"episode_reward": [516.0, 441.0, 516.0, 473.0, 453.0, 519.0, 455.0, 519.0, 519.0, 510.0, 180.0, 516.0, 473.0, 516.0, 462.0, 522.0, 470.0, 507.0, 419.0, 525.0, 507.0, 470.0, 533.0, 462.0, 513.0, 519.0, 495.0, 462.0, 519.0, 513.0, 513.0, 456.0, 468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 256.0, 205.0, 236.0, 262.0, 254.0, 235.0, 238.0, 225.0, 228.0, 253.0, 266.0, 227.0, 228.0, 268.0, 251.0, 258.0, 261.0, 259.0, 251.0, 91.0, 89.0, 261.0, 255.0, 238.0, 235.0, 254.0, 262.0, 236.0, 226.0, 268.0, 254.0, 239.0, 231.0, 253.0, 254.0, 204.0, 215.0, 267.0, 258.0, 256.0, 251.0, 247.0, 223.0, 273.0, 260.0, 210.0, 252.0, 254.0, 259.0, 267.0, 252.0, 241.0, 254.0, 231.0, 231.0, 264.0, 255.0, 254.0, 259.0, 255.0, 258.0, 237.0, 219.0, 236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851414554562826, "mean_inference_ms": 1.2077767111984112, "mean_action_processing_ms": 0.13264407790813673, "mean_env_wait_ms": 0.8487302506137289, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2790400, "num_agent_steps_trained": 2790400, "num_env_steps_sampled": 1395200, "num_env_steps_trained": 1395200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1395200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2790400, "timers": {"training_iteration_time_ms": 3657.592, "learn_time_ms": 1102.772, "learn_throughput": 11607.116, "synch_weights_time_ms": 11.338}, "counters": {"num_env_steps_sampled": 1395200, "num_env_steps_trained": 1395200, "num_agent_steps_sampled": 2790400, "num_agent_steps_trained": 2790400}, "done": false, "episodes_total": 3488, "training_iteration": 109, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-02", "timestamp": 1666580822, "time_this_iter_s": 3.695673704147339, "time_total_s": 410.4457929134369, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 410.4457929134369, "timesteps_since_restore": 0, "iterations_since_restore": 109, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.15, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 170.6, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 151.82, "shaped_reward_min": 116, "shaped_reward_max": 167, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.99, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.05, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.3, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.81, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.59, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.94, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.12, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.34, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.59, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.94, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.59, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.94, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.0814879569373254e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004494154709391296, "policy_loss": 0.00027276657056063414, "vf_loss": 7.438180446624756, "vf_explained_var": 0.6973411440849304, "kl": 0.0017573704244568944, "entropy": 1.134337067604065, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1408000, "num_env_steps_trained": 1408000, "num_agent_steps_sampled": 2816000, "num_agent_steps_trained": 2816000}, "sampler_results": {"episode_reward_max": 527.0, "episode_reward_min": 356.0, "episode_reward_mean": 493.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 246.51}, "custom_metrics": {"sparse_reward_mean": 170.6, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 151.82, "shaped_reward_min": 116, "shaped_reward_max": 167, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.99, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.05, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.3, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.66, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.81, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.3, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.52, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.59, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.94, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.12, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.34, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 13.59, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.94, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.59, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.94, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851207606609288, "mean_inference_ms": 1.2077257093049802, "mean_action_processing_ms": 0.13264280811149973, "mean_env_wait_ms": 0.8484066460590849, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 527.0, "episode_reward_min": 356.0, "episode_reward_mean": 493.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 246.51}, "hist_stats": {"episode_reward": [468.0, 465.0, 516.0, 519.0, 519.0, 522.0, 467.0, 479.0, 516.0, 513.0, 516.0, 425.0, 467.0, 504.0, 470.0, 516.0, 516.0, 473.0, 513.0, 473.0, 462.0, 450.0, 516.0, 402.0, 459.0, 513.0, 516.0, 513.0, 516.0, 470.0, 519.0, 462.0, 522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [236.0, 232.0, 228.0, 237.0, 262.0, 254.0, 264.0, 255.0, 257.0, 262.0, 254.0, 268.0, 222.0, 245.0, 247.0, 232.0, 266.0, 250.0, 247.0, 266.0, 255.0, 261.0, 196.0, 229.0, 240.0, 227.0, 246.0, 258.0, 244.0, 226.0, 246.0, 270.0, 256.0, 260.0, 233.0, 240.0, 259.0, 254.0, 246.0, 227.0, 231.0, 231.0, 229.0, 221.0, 265.0, 251.0, 216.0, 186.0, 225.0, 234.0, 267.0, 246.0, 257.0, 259.0, 254.0, 259.0, 260.0, 256.0, 244.0, 226.0, 261.0, 258.0, 230.0, 232.0, 257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851207606609288, "mean_inference_ms": 1.2077257093049802, "mean_action_processing_ms": 0.13264280811149973, "mean_env_wait_ms": 0.8484066460590849, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2816000, "num_agent_steps_trained": 2816000, "num_env_steps_sampled": 1408000, "num_env_steps_trained": 1408000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1408000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2816000, "timers": {"training_iteration_time_ms": 3660.1, "learn_time_ms": 1095.962, "learn_throughput": 11679.235, "synch_weights_time_ms": 11.779}, "counters": {"num_env_steps_sampled": 1408000, "num_env_steps_trained": 1408000, "num_agent_steps_sampled": 2816000, "num_agent_steps_trained": 2816000}, "done": false, "episodes_total": 3520, "training_iteration": 110, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-06", "timestamp": 1666580826, "time_this_iter_s": 3.6962802410125732, "time_total_s": 414.14207315444946, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 414.14207315444946, "timesteps_since_restore": 0, "iterations_since_restore": 110, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.86, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 171.4, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 152.62, "shaped_reward_min": 116, "shaped_reward_max": 167, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.61, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.48, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.21, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.13, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.21, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.21, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.5407439784686627e-34, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007949502905830741, "policy_loss": 0.0006166819366626441, "vf_loss": 7.434291839599609, "vf_explained_var": 0.6929993629455566, "kl": 0.00210048770532012, "entropy": 1.1303188800811768, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1420800, "num_env_steps_trained": 1420800, "num_agent_steps_sampled": 2841600, "num_agent_steps_trained": 2841600}, "sampler_results": {"episode_reward_max": 527.0, "episode_reward_min": 356.0, "episode_reward_mean": 495.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 247.71}, "custom_metrics": {"sparse_reward_mean": 171.4, "sparse_reward_min": 120, "sparse_reward_max": 180, "shaped_reward_mean": 152.62, "shaped_reward_min": 116, "shaped_reward_max": 167, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.61, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 13.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.74, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.8, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.48, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.47, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.21, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.13, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.21, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.21, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851923964426513, "mean_inference_ms": 1.2076839793340488, "mean_action_processing_ms": 0.13263903306444721, "mean_env_wait_ms": 0.8480824894398985, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 527.0, "episode_reward_min": 356.0, "episode_reward_mean": 495.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 247.71}, "hist_stats": {"episode_reward": [522.0, 522.0, 516.0, 522.0, 458.0, 468.0, 522.0, 513.0, 525.0, 404.0, 516.0, 476.0, 527.0, 464.0, 516.0, 376.0, 513.0, 522.0, 507.0, 468.0, 467.0, 516.0, 522.0, 513.0, 519.0, 516.0, 522.0, 519.0, 516.0, 356.0, 465.0, 513.0, 470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 265.0, 263.0, 259.0, 261.0, 255.0, 263.0, 259.0, 224.0, 234.0, 237.0, 231.0, 255.0, 267.0, 254.0, 259.0, 251.0, 274.0, 216.0, 188.0, 260.0, 256.0, 239.0, 237.0, 263.0, 264.0, 232.0, 232.0, 251.0, 265.0, 186.0, 190.0, 272.0, 241.0, 261.0, 261.0, 245.0, 262.0, 226.0, 242.0, 242.0, 225.0, 258.0, 258.0, 270.0, 252.0, 259.0, 254.0, 255.0, 264.0, 259.0, 257.0, 267.0, 255.0, 267.0, 252.0, 259.0, 257.0, 184.0, 172.0, 237.0, 228.0, 248.0, 265.0, 225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851923964426513, "mean_inference_ms": 1.2076839793340488, "mean_action_processing_ms": 0.13263903306444721, "mean_env_wait_ms": 0.8480824894398985, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2841600, "num_agent_steps_trained": 2841600, "num_env_steps_sampled": 1420800, "num_env_steps_trained": 1420800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1420800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2841600, "timers": {"training_iteration_time_ms": 3665.378, "learn_time_ms": 1099.576, "learn_throughput": 11640.853, "synch_weights_time_ms": 11.683}, "counters": {"num_env_steps_sampled": 1420800, "num_env_steps_trained": 1420800, "num_agent_steps_sampled": 2841600, "num_agent_steps_trained": 2841600}, "done": false, "episodes_total": 3552, "training_iteration": 111, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-10", "timestamp": 1666580830, "time_this_iter_s": 3.7287049293518066, "time_total_s": 417.87077808380127, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 417.87077808380127, "timesteps_since_restore": 0, "iterations_since_restore": 111, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.116666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 172.6, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 153.23, "shaped_reward_min": 133, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.16, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.68, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.51, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.01, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.59, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 4.8, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.18, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.59, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.59, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.703719892343314e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005474128993228078, "policy_loss": 0.0003818509867414832, "vf_loss": 7.298244476318359, "vf_explained_var": 0.6948171854019165, "kl": 0.0017750629922375083, "entropy": 1.1285228729248047, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1433600, "num_env_steps_trained": 1433600, "num_agent_steps_sampled": 2867200, "num_agent_steps_trained": 2867200}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 419.0, "episode_reward_mean": 498.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 249.215}, "custom_metrics": {"sparse_reward_mean": 172.6, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 153.23, "shaped_reward_min": 133, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.16, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.68, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.51, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.01, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.65, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.72, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.47, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.46, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.59, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 4.8, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.18, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.59, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.59, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852829337082552, "mean_inference_ms": 1.207667498626502, "mean_action_processing_ms": 0.1326380195196134, "mean_env_wait_ms": 0.8477856021099819, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 419.0, "episode_reward_mean": 498.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 279.0}, "policy_reward_mean": {"ppo": 249.215}, "hist_stats": {"episode_reward": [470.0, 519.0, 513.0, 470.0, 473.0, 513.0, 510.0, 519.0, 465.0, 465.0, 516.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 462.0, 473.0, 459.0, 519.0, 470.0, 465.0, 519.0, 513.0, 513.0, 458.0, 479.0, 513.0, 522.0, 456.0, 524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 245.0, 256.0, 263.0, 246.0, 267.0, 231.0, 239.0, 252.0, 221.0, 249.0, 264.0, 269.0, 241.0, 254.0, 265.0, 233.0, 232.0, 230.0, 235.0, 259.0, 257.0, 253.0, 257.0, 251.0, 262.0, 243.0, 279.0, 264.0, 249.0, 259.0, 263.0, 244.0, 272.0, 263.0, 253.0, 244.0, 218.0, 230.0, 243.0, 241.0, 218.0, 253.0, 266.0, 244.0, 226.0, 223.0, 242.0, 258.0, 261.0, 254.0, 259.0, 250.0, 263.0, 232.0, 226.0, 226.0, 253.0, 261.0, 252.0, 271.0, 251.0, 226.0, 230.0, 275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852829337082552, "mean_inference_ms": 1.207667498626502, "mean_action_processing_ms": 0.1326380195196134, "mean_env_wait_ms": 0.8477856021099819, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2867200, "num_agent_steps_trained": 2867200, "num_env_steps_sampled": 1433600, "num_env_steps_trained": 1433600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1433600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2867200, "timers": {"training_iteration_time_ms": 3638.258, "learn_time_ms": 1106.563, "learn_throughput": 11567.347, "synch_weights_time_ms": 11.157}, "counters": {"num_env_steps_sampled": 1433600, "num_env_steps_trained": 1433600, "num_agent_steps_sampled": 2867200, "num_agent_steps_trained": 2867200}, "done": false, "episodes_total": 3584, "training_iteration": 112, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-14", "timestamp": 1666580834, "time_this_iter_s": 3.762916326522827, "time_total_s": 421.6336944103241, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 421.6336944103241, "timesteps_since_restore": 0, "iterations_since_restore": 112, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.799999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 173.4, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 153.91, "shaped_reward_min": 127, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.61, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.54, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.05, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.61, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.61, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.61, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.851859946171657e-35, "cur_lr": 0.0010000000474974513, "total_loss": 7.296015974134207e-05, "policy_loss": -0.00010429794201627374, "vf_loss": 7.440869331359863, "vf_explained_var": 0.6927081346511841, "kl": 0.0020735471043735743, "entropy": 1.1336543560028076, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1446400, "num_env_steps_trained": 1446400, "num_agent_steps_sampled": 2892800, "num_agent_steps_trained": 2892800}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 419.0, "episode_reward_mean": 500.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 250.355}, "custom_metrics": {"sparse_reward_mean": 173.4, "sparse_reward_min": 140, "sparse_reward_max": 180, "shaped_reward_mean": 153.91, "shaped_reward_min": 127, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.61, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.54, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.05, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.58, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.64, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.44, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.39, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.61, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.17, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.61, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.61, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853944062468555, "mean_inference_ms": 1.2076545245921264, "mean_action_processing_ms": 0.1326425287956505, "mean_env_wait_ms": 0.8475362224447032, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 419.0, "episode_reward_mean": 500.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 250.355}, "hist_stats": {"episode_reward": [524.0, 473.0, 465.0, 518.0, 513.0, 516.0, 519.0, 465.0, 516.0, 464.0, 519.0, 468.0, 522.0, 522.0, 527.0, 510.0, 513.0, 522.0, 519.0, 516.0, 465.0, 513.0, 473.0, 525.0, 465.0, 462.0, 468.0, 453.0, 462.0, 465.0, 516.0, 519.0, 513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [275.0, 249.0, 232.0, 241.0, 233.0, 232.0, 261.0, 257.0, 260.0, 253.0, 266.0, 250.0, 262.0, 257.0, 226.0, 239.0, 267.0, 249.0, 231.0, 233.0, 265.0, 254.0, 239.0, 229.0, 261.0, 261.0, 260.0, 262.0, 272.0, 255.0, 250.0, 260.0, 258.0, 255.0, 260.0, 262.0, 257.0, 262.0, 269.0, 247.0, 234.0, 231.0, 238.0, 275.0, 235.0, 238.0, 260.0, 265.0, 225.0, 240.0, 238.0, 224.0, 221.0, 247.0, 239.0, 214.0, 238.0, 224.0, 237.0, 228.0, 261.0, 255.0, 252.0, 267.0, 257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853944062468555, "mean_inference_ms": 1.2076545245921264, "mean_action_processing_ms": 0.1326425287956505, "mean_env_wait_ms": 0.8475362224447032, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2892800, "num_agent_steps_trained": 2892800, "num_env_steps_sampled": 1446400, "num_env_steps_trained": 1446400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1446400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2892800, "timers": {"training_iteration_time_ms": 3635.054, "learn_time_ms": 1110.849, "learn_throughput": 11522.717, "synch_weights_time_ms": 12.34}, "counters": {"num_env_steps_sampled": 1446400, "num_env_steps_trained": 1446400, "num_agent_steps_sampled": 2892800, "num_agent_steps_trained": 2892800}, "done": false, "episodes_total": 3616, "training_iteration": 113, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-18", "timestamp": 1666580838, "time_this_iter_s": 3.7796876430511475, "time_total_s": 425.41338205337524, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 425.41338205337524, "timesteps_since_restore": 0, "iterations_since_restore": 113, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.933333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 170.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 152.01, "shaped_reward_min": 45, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.65, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.86, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.34, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.65, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.86, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.65, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.86, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.9259299730858284e-35, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005465570138767362, "policy_loss": 0.0003737257793545723, "vf_loss": 7.462190628051758, "vf_explained_var": 0.7065198421478271, "kl": 0.0016828658990561962, "entropy": 1.1467739343643188, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1459200, "num_env_steps_trained": 1459200, "num_agent_steps_sampled": 2918400, "num_agent_steps_trained": 2918400}, "sampler_results": {"episode_reward_max": 533.0, "episode_reward_min": 165.0, "episode_reward_mean": 492.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 246.205}, "custom_metrics": {"sparse_reward_mean": 170.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 152.01, "shaped_reward_min": 45, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 13.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.6, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.38, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.65, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.86, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.06, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.21, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.34, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.28, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.65, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.86, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.65, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.86, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854311581097831, "mean_inference_ms": 1.2076565796296106, "mean_action_processing_ms": 0.13265011807136642, "mean_env_wait_ms": 0.8473229051130425, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 533.0, "episode_reward_min": 165.0, "episode_reward_mean": 492.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 246.205}, "hist_stats": {"episode_reward": [513.0, 419.0, 519.0, 527.0, 424.0, 516.0, 513.0, 516.0, 513.0, 519.0, 516.0, 462.0, 516.0, 516.0, 516.0, 524.0, 533.0, 482.0, 516.0, 516.0, 510.0, 519.0, 470.0, 462.0, 513.0, 481.0, 525.0, 510.0, 513.0, 516.0, 516.0, 462.0, 510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 201.0, 218.0, 245.0, 274.0, 257.0, 270.0, 206.0, 218.0, 264.0, 252.0, 243.0, 270.0, 253.0, 263.0, 262.0, 251.0, 257.0, 262.0, 259.0, 257.0, 229.0, 233.0, 250.0, 266.0, 265.0, 251.0, 255.0, 261.0, 256.0, 268.0, 262.0, 271.0, 249.0, 233.0, 256.0, 260.0, 256.0, 260.0, 252.0, 258.0, 247.0, 272.0, 251.0, 219.0, 225.0, 237.0, 242.0, 271.0, 240.0, 241.0, 254.0, 271.0, 258.0, 252.0, 264.0, 249.0, 250.0, 266.0, 258.0, 258.0, 229.0, 233.0, 256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854311581097831, "mean_inference_ms": 1.2076565796296106, "mean_action_processing_ms": 0.13265011807136642, "mean_env_wait_ms": 0.8473229051130425, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2918400, "num_agent_steps_trained": 2918400, "num_env_steps_sampled": 1459200, "num_env_steps_trained": 1459200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1459200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2918400, "timers": {"training_iteration_time_ms": 3635.297, "learn_time_ms": 1114.383, "learn_throughput": 11486.175, "synch_weights_time_ms": 12.746}, "counters": {"num_env_steps_sampled": 1459200, "num_env_steps_trained": 1459200, "num_agent_steps_sampled": 2918400, "num_agent_steps_trained": 2918400}, "done": false, "episodes_total": 3648, "training_iteration": 114, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-22", "timestamp": 1666580842, "time_this_iter_s": 3.6836071014404297, "time_total_s": 429.0969891548157, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 429.0969891548157, "timesteps_since_restore": 0, "iterations_since_restore": 114, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.04, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 169.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 151.24, "shaped_reward_min": 45, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.19, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 13.98, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.53, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.94, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.53, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.53, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 9.629649865429142e-36, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018681013025343418, "policy_loss": -0.002045394852757454, "vf_loss": 7.456493377685547, "vf_explained_var": 0.6910403966903687, "kl": 0.0016753775998950005, "entropy": 1.1367100477218628, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1472000, "num_env_steps_trained": 1472000, "num_agent_steps_sampled": 2944000, "num_agent_steps_trained": 2944000}, "sampler_results": {"episode_reward_max": 530.0, "episode_reward_min": 165.0, "episode_reward_mean": 490.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 245.22}, "custom_metrics": {"sparse_reward_mean": 169.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 151.24, "shaped_reward_min": 45, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.19, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 13.98, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.49, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.34, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 13.53, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.94, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 13.53, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 13.53, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854759185382301, "mean_inference_ms": 1.2076344021717942, "mean_action_processing_ms": 0.13265548187961484, "mean_env_wait_ms": 0.8470913220135837, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 530.0, "episode_reward_min": 165.0, "episode_reward_mean": 490.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 245.22}, "hist_stats": {"episode_reward": [510.0, 513.0, 462.0, 522.0, 519.0, 519.0, 462.0, 459.0, 524.0, 519.0, 447.0, 513.0, 473.0, 522.0, 516.0, 516.0, 530.0, 525.0, 473.0, 519.0, 513.0, 462.0, 510.0, 522.0, 530.0, 516.0, 516.0, 519.0, 522.0, 468.0, 470.0, 513.0, 455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 254.0, 260.0, 253.0, 214.0, 248.0, 276.0, 246.0, 248.0, 271.0, 253.0, 266.0, 238.0, 224.0, 228.0, 231.0, 255.0, 269.0, 245.0, 274.0, 236.0, 211.0, 252.0, 261.0, 229.0, 244.0, 258.0, 264.0, 252.0, 264.0, 262.0, 254.0, 268.0, 262.0, 262.0, 263.0, 242.0, 231.0, 262.0, 257.0, 260.0, 253.0, 227.0, 235.0, 253.0, 257.0, 254.0, 268.0, 268.0, 262.0, 271.0, 245.0, 256.0, 260.0, 267.0, 252.0, 269.0, 253.0, 245.0, 223.0, 254.0, 216.0, 253.0, 260.0, 230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854759185382301, "mean_inference_ms": 1.2076344021717942, "mean_action_processing_ms": 0.13265548187961484, "mean_env_wait_ms": 0.8470913220135837, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2944000, "num_agent_steps_trained": 2944000, "num_env_steps_sampled": 1472000, "num_env_steps_trained": 1472000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1472000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2944000, "timers": {"training_iteration_time_ms": 3637.242, "learn_time_ms": 1116.26, "learn_throughput": 11466.867, "synch_weights_time_ms": 12.813}, "counters": {"num_env_steps_sampled": 1472000, "num_env_steps_trained": 1472000, "num_agent_steps_sampled": 2944000, "num_agent_steps_trained": 2944000}, "done": false, "episodes_total": 3680, "training_iteration": 115, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-25", "timestamp": 1666580845, "time_this_iter_s": 3.708575487136841, "time_total_s": 432.8055646419525, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 432.8055646419525, "timesteps_since_restore": 0, "iterations_since_restore": 115, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.849999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 167.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 149.23, "shaped_reward_min": 45, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.56, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.35, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.08, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 13.87, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.76, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.88, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.21, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.76, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.76, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 4.814824932714571e-36, "cur_lr": 0.0010000000474974513, "total_loss": -0.003027984406799078, "policy_loss": -0.0032042870298027992, "vf_loss": 7.424014091491699, "vf_explained_var": 0.6924208402633667, "kl": 0.0019326722249388695, "entropy": 1.1321947574615479, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1484800, "num_env_steps_trained": 1484800, "num_agent_steps_sampled": 2969600, "num_agent_steps_trained": 2969600}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 165.0, "episode_reward_mean": 484.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 242.215}, "custom_metrics": {"sparse_reward_mean": 167.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 149.23, "shaped_reward_min": 45, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.56, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 14.35, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.08, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 13.87, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.52, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.61, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.38, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.76, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 13.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 3.88, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.21, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.52, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.18, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.2, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.14, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.76, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 13.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.76, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 13.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854368390251488, "mean_inference_ms": 1.2076880422103875, "mean_action_processing_ms": 0.13265155972540793, "mean_env_wait_ms": 0.8467858667409799, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 165.0, "episode_reward_mean": 484.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 78.0}, "policy_reward_max": {"ppo": 276.0}, "policy_reward_mean": {"ppo": 242.215}, "hist_stats": {"episode_reward": [455.0, 519.0, 510.0, 513.0, 465.0, 519.0, 473.0, 513.0, 473.0, 467.0, 370.0, 510.0, 516.0, 467.0, 513.0, 165.0, 516.0, 479.0, 516.0, 378.0, 522.0, 473.0, 522.0, 473.0, 513.0, 516.0, 425.0, 468.0, 479.0, 513.0, 455.0, 462.0, 453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [230.0, 225.0, 262.0, 257.0, 257.0, 253.0, 243.0, 270.0, 223.0, 242.0, 252.0, 267.0, 230.0, 243.0, 266.0, 247.0, 241.0, 232.0, 228.0, 239.0, 170.0, 200.0, 246.0, 264.0, 257.0, 259.0, 233.0, 234.0, 254.0, 259.0, 87.0, 78.0, 253.0, 263.0, 239.0, 240.0, 253.0, 263.0, 194.0, 184.0, 255.0, 267.0, 234.0, 239.0, 257.0, 265.0, 218.0, 255.0, 262.0, 251.0, 275.0, 241.0, 219.0, 206.0, 235.0, 233.0, 242.0, 237.0, 267.0, 246.0, 228.0, 227.0, 226.0, 236.0, 227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6854368390251488, "mean_inference_ms": 1.2076880422103875, "mean_action_processing_ms": 0.13265155972540793, "mean_env_wait_ms": 0.8467858667409799, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2969600, "num_agent_steps_trained": 2969600, "num_env_steps_sampled": 1484800, "num_env_steps_trained": 1484800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1484800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2969600, "timers": {"training_iteration_time_ms": 3656.379, "learn_time_ms": 1121.671, "learn_throughput": 11411.55, "synch_weights_time_ms": 12.795}, "counters": {"num_env_steps_sampled": 1484800, "num_env_steps_trained": 1484800, "num_agent_steps_sampled": 2969600, "num_agent_steps_trained": 2969600}, "done": false, "episodes_total": 3712, "training_iteration": 116, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-29", "timestamp": 1666580849, "time_this_iter_s": 3.8891963958740234, "time_total_s": 436.69476103782654, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 436.69476103782654, "timesteps_since_restore": 0, "iterations_since_restore": 116, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.14, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 168.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 149.2, "shaped_reward_min": 60, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.32, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.46, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.02, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.66, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.42, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.66, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.66, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 2.4074124663572855e-36, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016507103573530912, "policy_loss": -0.0018331120954826474, "vf_loss": 7.434719085693359, "vf_explained_var": 0.6968331933021545, "kl": 0.0018940645968541503, "entropy": 1.122139811515808, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1497600, "num_env_steps_trained": 1497600, "num_agent_steps_sampled": 2995200, "num_agent_steps_trained": 2995200}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 180.0, "episode_reward_mean": 486.4, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 243.2}, "custom_metrics": {"sparse_reward_mean": 168.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 149.2, "shaped_reward_min": 60, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.32, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.46, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.02, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.31, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.25, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.66, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.27, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.42, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.66, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.66, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853727837400302, "mean_inference_ms": 1.2079962459958373, "mean_action_processing_ms": 0.13264166399823427, "mean_env_wait_ms": 0.8467385166751192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 180.0, "episode_reward_mean": 486.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 243.2}, "hist_stats": {"episode_reward": [453.0, 516.0, 467.0, 470.0, 516.0, 473.0, 516.0, 465.0, 522.0, 510.0, 522.0, 516.0, 522.0, 519.0, 465.0, 510.0, 519.0, 410.0, 519.0, 522.0, 513.0, 453.0, 465.0, 513.0, 513.0, 519.0, 525.0, 519.0, 482.0, 516.0, 519.0, 464.0, 467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [227.0, 226.0, 262.0, 254.0, 228.0, 239.0, 231.0, 239.0, 259.0, 257.0, 242.0, 231.0, 265.0, 251.0, 231.0, 234.0, 260.0, 262.0, 250.0, 260.0, 260.0, 262.0, 267.0, 249.0, 274.0, 248.0, 260.0, 259.0, 236.0, 229.0, 250.0, 260.0, 254.0, 265.0, 203.0, 207.0, 259.0, 260.0, 265.0, 257.0, 261.0, 252.0, 220.0, 233.0, 221.0, 244.0, 246.0, 267.0, 245.0, 268.0, 267.0, 252.0, 267.0, 258.0, 252.0, 267.0, 241.0, 241.0, 246.0, 270.0, 253.0, 266.0, 228.0, 236.0, 225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6853727837400302, "mean_inference_ms": 1.2079962459958373, "mean_action_processing_ms": 0.13264166399823427, "mean_env_wait_ms": 0.8467385166751192, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 2995200, "num_agent_steps_trained": 2995200, "num_env_steps_sampled": 1497600, "num_env_steps_trained": 1497600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1497600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 2995200, "timers": {"training_iteration_time_ms": 3696.818, "learn_time_ms": 1132.17, "learn_throughput": 11305.723, "synch_weights_time_ms": 12.436}, "counters": {"num_env_steps_sampled": 1497600, "num_env_steps_trained": 1497600, "num_agent_steps_sampled": 2995200, "num_agent_steps_trained": 2995200}, "done": false, "episodes_total": 3744, "training_iteration": 117, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-34", "timestamp": 1666580854, "time_this_iter_s": 3.9661037921905518, "time_total_s": 440.6608648300171, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 440.6608648300171, "timesteps_since_restore": 0, "iterations_since_restore": 117, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.383333333333336, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 168.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 148.6, "shaped_reward_min": 60, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.76, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.63, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.32, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.95, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.18, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.32, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.95, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.32, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.95, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.2037062331786428e-36, "cur_lr": 0.0010000000474974513, "total_loss": 0.000875909929163754, "policy_loss": 0.0006957841105759144, "vf_loss": 7.3936967849731445, "vf_explained_var": 0.7045917510986328, "kl": 0.0021515809930860996, "entropy": 1.1184874773025513, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1510400, "num_env_steps_trained": 1510400, "num_agent_steps_sampled": 3020800, "num_agent_steps_trained": 3020800}, "sampler_results": {"episode_reward_max": 525.0, "episode_reward_min": 180.0, "episode_reward_mean": 485.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 242.5}, "custom_metrics": {"sparse_reward_mean": 168.2, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 148.6, "shaped_reward_min": 60, "shaped_reward_max": 165, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 14.76, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.63, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.28, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.45, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.55, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.32, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 13.95, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.62, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.23, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.34, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.18, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.32, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 13.95, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.32, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 13.95, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852450585935208, "mean_inference_ms": 1.208216584112233, "mean_action_processing_ms": 0.13261600408321741, "mean_env_wait_ms": 0.8465971694239437, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 525.0, "episode_reward_min": 180.0, "episode_reward_mean": 485.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 242.5}, "hist_stats": {"episode_reward": [467.0, 458.0, 462.0, 462.0, 516.0, 416.0, 462.0, 465.0, 456.0, 516.0, 519.0, 399.0, 470.0, 519.0, 411.0, 459.0, 516.0, 470.0, 444.0, 465.0, 521.0, 456.0, 456.0, 519.0, 507.0, 522.0, 516.0, 519.0, 525.0, 513.0, 519.0, 525.0, 462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [225.0, 242.0, 218.0, 240.0, 227.0, 235.0, 231.0, 231.0, 260.0, 256.0, 225.0, 191.0, 227.0, 235.0, 220.0, 245.0, 238.0, 218.0, 267.0, 249.0, 253.0, 266.0, 196.0, 203.0, 247.0, 223.0, 267.0, 252.0, 211.0, 200.0, 220.0, 239.0, 256.0, 260.0, 235.0, 235.0, 208.0, 236.0, 234.0, 231.0, 276.0, 245.0, 223.0, 233.0, 243.0, 213.0, 255.0, 264.0, 257.0, 250.0, 255.0, 267.0, 245.0, 271.0, 253.0, 266.0, 250.0, 275.0, 255.0, 258.0, 272.0, 247.0, 258.0, 267.0, 228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6852450585935208, "mean_inference_ms": 1.208216584112233, "mean_action_processing_ms": 0.13261600408321741, "mean_env_wait_ms": 0.8465971694239437, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3020800, "num_agent_steps_trained": 3020800, "num_env_steps_sampled": 1510400, "num_env_steps_trained": 1510400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1510400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3020800, "timers": {"training_iteration_time_ms": 3691.368, "learn_time_ms": 1140.138, "learn_throughput": 11226.714, "synch_weights_time_ms": 12.254}, "counters": {"num_env_steps_sampled": 1510400, "num_env_steps_trained": 1510400, "num_agent_steps_sampled": 3020800, "num_agent_steps_trained": 3020800}, "done": false, "episodes_total": 3776, "training_iteration": 118, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-37", "timestamp": 1666580857, "time_this_iter_s": 3.6140036582946777, "time_total_s": 444.27486848831177, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 444.27486848831177, "timesteps_since_restore": 0, "iterations_since_restore": 118, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.383333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 169.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 150.04, "shaped_reward_min": 60, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.03, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.02, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.53, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.47, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.17, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.17, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.17, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 6.018531165893214e-37, "cur_lr": 0.0010000000474974513, "total_loss": -2.7321191737428308e-05, "policy_loss": -0.0002118002448696643, "vf_loss": 7.386599540710449, "vf_explained_var": 0.6774031519889832, "kl": 0.0019258097745478153, "entropy": 1.1083590984344482, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1523200, "num_env_steps_trained": 1523200, "num_agent_steps_sampled": 3046400, "num_agent_steps_trained": 3046400}, "sampler_results": {"episode_reward_max": 530.0, "episode_reward_min": 180.0, "episode_reward_mean": 489.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 244.62}, "custom_metrics": {"sparse_reward_mean": 169.6, "sparse_reward_min": 60, "sparse_reward_max": 180, "shaped_reward_mean": 150.04, "shaped_reward_min": 60, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.03, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.02, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.53, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.47, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.54, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.34, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.17, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.27, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.72, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.25, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.61, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.42, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.17, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.17, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68516331162428, "mean_inference_ms": 1.208340954911563, "mean_action_processing_ms": 0.13259036340208671, "mean_env_wait_ms": 0.8464620351800528, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 530.0, "episode_reward_min": 180.0, "episode_reward_mean": 489.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 282.0}, "policy_reward_mean": {"ppo": 244.62}, "hist_stats": {"episode_reward": [462.0, 519.0, 468.0, 453.0, 516.0, 513.0, 519.0, 501.0, 419.0, 516.0, 462.0, 522.0, 180.0, 479.0, 504.0, 465.0, 507.0, 522.0, 468.0, 519.0, 510.0, 522.0, 465.0, 413.0, 513.0, 519.0, 427.0, 473.0, 465.0, 513.0, 513.0, 516.0, 464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [228.0, 234.0, 269.0, 250.0, 240.0, 228.0, 237.0, 216.0, 257.0, 259.0, 257.0, 256.0, 253.0, 266.0, 250.0, 251.0, 208.0, 211.0, 271.0, 245.0, 224.0, 238.0, 255.0, 267.0, 90.0, 90.0, 241.0, 238.0, 247.0, 257.0, 223.0, 242.0, 248.0, 259.0, 254.0, 268.0, 231.0, 237.0, 253.0, 266.0, 262.0, 248.0, 257.0, 265.0, 245.0, 220.0, 212.0, 201.0, 240.0, 273.0, 282.0, 237.0, 217.0, 210.0, 228.0, 245.0, 244.0, 221.0, 255.0, 258.0, 248.0, 265.0, 267.0, 249.0, 218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68516331162428, "mean_inference_ms": 1.208340954911563, "mean_action_processing_ms": 0.13259036340208671, "mean_env_wait_ms": 0.8464620351800528, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3046400, "num_agent_steps_trained": 3046400, "num_env_steps_sampled": 1523200, "num_env_steps_trained": 1523200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1523200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3046400, "timers": {"training_iteration_time_ms": 3695.547, "learn_time_ms": 1140.914, "learn_throughput": 11219.077, "synch_weights_time_ms": 12.357}, "counters": {"num_env_steps_sampled": 1523200, "num_env_steps_trained": 1523200, "num_agent_steps_sampled": 3046400, "num_agent_steps_trained": 3046400}, "done": false, "episodes_total": 3808, "training_iteration": 119, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-41", "timestamp": 1666580861, "time_this_iter_s": 3.732775926589966, "time_total_s": 448.00764441490173, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 448.00764441490173, "timesteps_since_restore": 0, "iterations_since_restore": 119, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 172.8, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 153.3, "shaped_reward_min": 108, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.35, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 15.19, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.87, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.7, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.63, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.4, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.63, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.4, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.63, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.4, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.009265582946607e-37, "cur_lr": 0.0010000000474974513, "total_loss": -0.005632634274661541, "policy_loss": -0.0058066705241799355, "vf_loss": 7.275279998779297, "vf_explained_var": 0.7075681090354919, "kl": 0.0015871080104261637, "entropy": 1.1069822311401367, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1536000, "num_env_steps_trained": 1536000, "num_agent_steps_sampled": 3072000, "num_agent_steps_trained": 3072000}, "sampler_results": {"episode_reward_max": 567.0, "episode_reward_min": 308.0, "episode_reward_mean": 498.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 149.0}, "policy_reward_max": {"ppo": 285.0}, "policy_reward_mean": {"ppo": 249.45}, "custom_metrics": {"sparse_reward_mean": 172.8, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 153.3, "shaped_reward_min": 108, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.35, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 15.19, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 13.87, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.7, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.46, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.63, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.4, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.09, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.6, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.63, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.4, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.63, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.4, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685130443905528, "mean_inference_ms": 1.2081711376748996, "mean_action_processing_ms": 0.1325676931020594, "mean_env_wait_ms": 0.8460857022725299, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 567.0, "episode_reward_min": 308.0, "episode_reward_mean": 498.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 149.0}, "policy_reward_max": {"ppo": 285.0}, "policy_reward_mean": {"ppo": 249.45}, "hist_stats": {"episode_reward": [464.0, 476.0, 513.0, 441.0, 522.0, 516.0, 519.0, 519.0, 516.0, 468.0, 522.0, 465.0, 470.0, 507.0, 462.0, 513.0, 470.0, 516.0, 308.0, 519.0, 519.0, 459.0, 467.0, 516.0, 513.0, 510.0, 516.0, 513.0, 522.0, 522.0, 473.0, 516.0, 522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [218.0, 246.0, 233.0, 243.0, 263.0, 250.0, 219.0, 222.0, 259.0, 263.0, 251.0, 265.0, 268.0, 251.0, 260.0, 259.0, 260.0, 256.0, 239.0, 229.0, 265.0, 257.0, 234.0, 231.0, 231.0, 239.0, 258.0, 249.0, 225.0, 237.0, 264.0, 249.0, 244.0, 226.0, 261.0, 255.0, 159.0, 149.0, 272.0, 247.0, 256.0, 263.0, 222.0, 237.0, 222.0, 245.0, 250.0, 266.0, 248.0, 265.0, 262.0, 248.0, 257.0, 259.0, 259.0, 254.0, 269.0, 253.0, 260.0, 262.0, 236.0, 237.0, 261.0, 255.0, 264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685130443905528, "mean_inference_ms": 1.2081711376748996, "mean_action_processing_ms": 0.1325676931020594, "mean_env_wait_ms": 0.8460857022725299, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3072000, "num_agent_steps_trained": 3072000, "num_env_steps_sampled": 1536000, "num_env_steps_trained": 1536000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1536000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3072000, "timers": {"training_iteration_time_ms": 3696.184, "learn_time_ms": 1147.37, "learn_throughput": 11155.952, "synch_weights_time_ms": 11.575}, "counters": {"num_env_steps_sampled": 1536000, "num_env_steps_trained": 1536000, "num_agent_steps_sampled": 3072000, "num_agent_steps_trained": 3072000}, "done": false, "episodes_total": 3840, "training_iteration": 120, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-45", "timestamp": 1666580865, "time_this_iter_s": 3.6966631412506104, "time_total_s": 451.70430755615234, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 451.70430755615234, "timesteps_since_restore": 0, "iterations_since_restore": 120, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.88333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 174.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 154.83, "shaped_reward_min": 114, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.24, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.04, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.8, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.47, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.45, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.8, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.47, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.8, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.47, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.5046327914733034e-37, "cur_lr": 0.0010000000474974513, "total_loss": 0.000531826401129365, "policy_loss": 0.00035990215837955475, "vf_loss": 7.2422380447387695, "vf_explained_var": 0.7075891494750977, "kl": 0.0017436475027352571, "entropy": 1.1045974493026733, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1548800, "num_env_steps_trained": 1548800, "num_agent_steps_sampled": 3097600, "num_agent_steps_trained": 3097600}, "sampler_results": {"episode_reward_max": 567.0, "episode_reward_min": 354.0, "episode_reward_mean": 503.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 170.0}, "policy_reward_max": {"ppo": 286.0}, "policy_reward_mean": {"ppo": 251.815}, "custom_metrics": {"sparse_reward_mean": 174.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 154.83, "shaped_reward_min": 114, "shaped_reward_max": 170, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.24, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.04, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.31, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.8, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.47, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 13, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.28, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.53, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.45, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.8, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.47, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.8, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.47, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851763737865477, "mean_inference_ms": 1.2080972065799291, "mean_action_processing_ms": 0.13256303185673912, "mean_env_wait_ms": 0.8457883628449312, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 567.0, "episode_reward_min": 354.0, "episode_reward_mean": 503.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 170.0}, "policy_reward_max": {"ppo": 286.0}, "policy_reward_mean": {"ppo": 251.815}, "hist_stats": {"episode_reward": [522.0, 435.0, 465.0, 513.0, 519.0, 473.0, 354.0, 513.0, 510.0, 468.0, 507.0, 519.0, 419.0, 516.0, 456.0, 519.0, 513.0, 519.0, 468.0, 525.0, 516.0, 519.0, 519.0, 473.0, 513.0, 504.0, 519.0, 516.0, 519.0, 519.0, 516.0, 459.0, 416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 212.0, 223.0, 237.0, 228.0, 245.0, 268.0, 264.0, 255.0, 237.0, 236.0, 184.0, 170.0, 256.0, 257.0, 246.0, 264.0, 236.0, 232.0, 256.0, 251.0, 248.0, 271.0, 214.0, 205.0, 260.0, 256.0, 221.0, 235.0, 255.0, 264.0, 252.0, 261.0, 275.0, 244.0, 229.0, 239.0, 250.0, 275.0, 247.0, 269.0, 270.0, 249.0, 257.0, 262.0, 234.0, 239.0, 256.0, 257.0, 257.0, 247.0, 269.0, 250.0, 270.0, 246.0, 257.0, 262.0, 263.0, 256.0, 252.0, 264.0, 224.0, 235.0, 201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851763737865477, "mean_inference_ms": 1.2080972065799291, "mean_action_processing_ms": 0.13256303185673912, "mean_env_wait_ms": 0.8457883628449312, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3097600, "num_agent_steps_trained": 3097600, "num_env_steps_sampled": 1548800, "num_env_steps_trained": 1548800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1548800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3097600, "timers": {"training_iteration_time_ms": 3710.556, "learn_time_ms": 1164.117, "learn_throughput": 10995.457, "synch_weights_time_ms": 11.488}, "counters": {"num_env_steps_sampled": 1548800, "num_env_steps_trained": 1548800, "num_agent_steps_sampled": 3097600, "num_agent_steps_trained": 3097600}, "done": false, "episodes_total": 3872, "training_iteration": 121, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-49", "timestamp": 1666580869, "time_this_iter_s": 3.874966859817505, "time_total_s": 455.57927441596985, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 455.57927441596985, "timesteps_since_restore": 0, "iterations_since_restore": 121, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.6, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 175.4, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.16, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.52, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 14.15, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.89, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.91, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.51, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.95, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.38, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.91, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.51, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.91, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.51, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 7.523163957366517e-38, "cur_lr": 0.0010000000474974513, "total_loss": 5.595572292804718e-05, "policy_loss": -0.00012787984451279044, "vf_loss": 7.336641311645508, "vf_explained_var": 0.7027114629745483, "kl": 0.001904007513076067, "entropy": 1.0996546745300293, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1561600, "num_env_steps_trained": 1561600, "num_agent_steps_sampled": 3123200, "num_agent_steps_trained": 3123200}, "sampler_results": {"episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 506.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 288.0}, "policy_reward_mean": {"ppo": 253.48}, "custom_metrics": {"sparse_reward_mean": 175.4, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.16, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.52, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 14.15, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.89, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.49, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 13.91, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.51, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.95, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.42, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.38, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.91, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.51, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.91, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.51, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851629443359439, "mean_inference_ms": 1.208022696415082, "mean_action_processing_ms": 0.1325572923429858, "mean_env_wait_ms": 0.8454999723140769, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 506.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 288.0}, "policy_reward_mean": {"ppo": 253.48}, "hist_stats": {"episode_reward": [416.0, 516.0, 530.0, 522.0, 522.0, 522.0, 525.0, 522.0, 516.0, 519.0, 504.0, 473.0, 473.0, 522.0, 516.0, 424.0, 525.0, 510.0, 522.0, 524.0, 525.0, 567.0, 522.0, 527.0, 510.0, 519.0, 507.0, 468.0, 522.0, 519.0, 522.0, 522.0, 465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [201.0, 215.0, 257.0, 259.0, 260.0, 270.0, 272.0, 250.0, 254.0, 268.0, 254.0, 268.0, 262.0, 263.0, 266.0, 256.0, 268.0, 248.0, 246.0, 273.0, 239.0, 265.0, 229.0, 244.0, 229.0, 244.0, 267.0, 255.0, 250.0, 266.0, 211.0, 213.0, 254.0, 271.0, 257.0, 253.0, 256.0, 266.0, 275.0, 249.0, 275.0, 250.0, 282.0, 285.0, 261.0, 261.0, 271.0, 256.0, 263.0, 247.0, 241.0, 278.0, 255.0, 252.0, 236.0, 232.0, 258.0, 264.0, 253.0, 266.0, 265.0, 257.0, 262.0, 260.0, 237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851629443359439, "mean_inference_ms": 1.208022696415082, "mean_action_processing_ms": 0.1325572923429858, "mean_env_wait_ms": 0.8454999723140769, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3123200, "num_agent_steps_trained": 3123200, "num_env_steps_sampled": 1561600, "num_env_steps_trained": 1561600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1561600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3123200, "timers": {"training_iteration_time_ms": 3704.116, "learn_time_ms": 1167.28, "learn_throughput": 10965.667, "synch_weights_time_ms": 11.523}, "counters": {"num_env_steps_sampled": 1561600, "num_env_steps_trained": 1561600, "num_agent_steps_sampled": 3123200, "num_agent_steps_trained": 3123200}, "done": false, "episodes_total": 3904, "training_iteration": 122, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-53", "timestamp": 1666580873, "time_this_iter_s": 3.687739849090576, "time_total_s": 459.2670142650604, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 459.2670142650604, "timesteps_since_restore": 0, "iterations_since_restore": 122, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.883333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.11, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.65, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.92, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.48, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.48, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.48, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 3.7615819786832586e-38, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018413320649415255, "policy_loss": -0.0020147243048995733, "vf_loss": 7.221100807189941, "vf_explained_var": 0.7078213691711426, "kl": 0.0020635072141885757, "entropy": 1.0974304676055908, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1574400, "num_env_steps_trained": 1574400, "num_agent_steps_sampled": 3148800, "num_agent_steps_trained": 3148800}, "sampler_results": {"episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 508.51, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 290.0}, "policy_reward_mean": {"ppo": 254.255}, "custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.11, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.65, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.28, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.92, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.57, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.32, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.48, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.48, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.48, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851096061071673, "mean_inference_ms": 1.2079490216862163, "mean_action_processing_ms": 0.13255382184265097, "mean_env_wait_ms": 0.8452099267656442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 508.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 290.0}, "policy_reward_mean": {"ppo": 254.255}, "hist_stats": {"episode_reward": [465.0, 525.0, 522.0, 468.0, 519.0, 519.0, 513.0, 516.0, 516.0, 468.0, 567.0, 530.0, 482.0, 516.0, 467.0, 513.0, 507.0, 462.0, 519.0, 525.0, 516.0, 513.0, 468.0, 464.0, 504.0, 516.0, 479.0, 516.0, 522.0, 522.0, 525.0, 525.0, 519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [237.0, 228.0, 258.0, 267.0, 263.0, 259.0, 239.0, 229.0, 250.0, 269.0, 256.0, 263.0, 252.0, 261.0, 251.0, 265.0, 271.0, 245.0, 232.0, 236.0, 281.0, 286.0, 275.0, 255.0, 248.0, 234.0, 267.0, 249.0, 238.0, 229.0, 261.0, 252.0, 250.0, 257.0, 219.0, 243.0, 258.0, 261.0, 262.0, 263.0, 266.0, 250.0, 246.0, 267.0, 240.0, 228.0, 212.0, 252.0, 256.0, 248.0, 257.0, 259.0, 246.0, 233.0, 251.0, 265.0, 244.0, 278.0, 260.0, 262.0, 264.0, 261.0, 254.0, 271.0, 254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6851096061071673, "mean_inference_ms": 1.2079490216862163, "mean_action_processing_ms": 0.13255382184265097, "mean_env_wait_ms": 0.8452099267656442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3148800, "num_agent_steps_trained": 3148800, "num_env_steps_sampled": 1574400, "num_env_steps_trained": 1574400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1574400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3148800, "timers": {"training_iteration_time_ms": 3706.167, "learn_time_ms": 1173.127, "learn_throughput": 10911.01, "synch_weights_time_ms": 10.047}, "counters": {"num_env_steps_sampled": 1574400, "num_env_steps_trained": 1574400, "num_agent_steps_sampled": 3148800, "num_agent_steps_trained": 3148800}, "done": false, "episodes_total": 3936, "training_iteration": 123, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-07-57", "timestamp": 1666580877, "time_this_iter_s": 3.806124210357666, "time_total_s": 463.0731384754181, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 463.0731384754181, "timesteps_since_restore": 0, "iterations_since_restore": 123, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.8, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 176.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.68, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.41, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.99, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.98, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.59, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.98, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.59, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.98, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.59, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 1.8807909893416293e-38, "cur_lr": 0.0010000000474974513, "total_loss": -0.003325252328068018, "policy_loss": -0.003511276328936219, "vf_loss": 7.359045028686523, "vf_explained_var": 0.703230619430542, "kl": 0.001878553070127964, "entropy": 1.0997591018676758, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1587200, "num_env_steps_trained": 1587200, "num_agent_steps_sampled": 3174400, "num_agent_steps_trained": 3174400}, "sampler_results": {"episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 509.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 290.0}, "policy_reward_mean": {"ppo": 254.94}, "custom_metrics": {"sparse_reward_mean": 176.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.68, "shaped_reward_min": 60, "shaped_reward_max": 173, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.41, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.22, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 14.99, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.53, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.28, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.98, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.59, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.23, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.43, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.4, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.98, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.59, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.98, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.59, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685017408158804, "mean_inference_ms": 1.2078514184124396, "mean_action_processing_ms": 0.13254463761073323, "mean_env_wait_ms": 0.8448968158681478, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 573.0, "episode_reward_min": 180.0, "episode_reward_mean": 509.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 290.0}, "policy_reward_mean": {"ppo": 254.94}, "hist_stats": {"episode_reward": [519.0, 519.0, 462.0, 516.0, 519.0, 519.0, 516.0, 519.0, 479.0, 459.0, 522.0, 516.0, 519.0, 519.0, 522.0, 516.0, 519.0, 513.0, 462.0, 522.0, 522.0, 525.0, 519.0, 519.0, 513.0, 510.0, 573.0, 513.0, 180.0, 519.0, 519.0, 516.0, 530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 265.0, 264.0, 255.0, 227.0, 235.0, 248.0, 268.0, 254.0, 265.0, 262.0, 257.0, 262.0, 254.0, 254.0, 265.0, 235.0, 244.0, 223.0, 236.0, 267.0, 255.0, 262.0, 254.0, 251.0, 268.0, 256.0, 263.0, 254.0, 268.0, 241.0, 275.0, 247.0, 272.0, 264.0, 249.0, 232.0, 230.0, 262.0, 260.0, 265.0, 257.0, 261.0, 264.0, 267.0, 252.0, 258.0, 261.0, 246.0, 267.0, 266.0, 244.0, 285.0, 288.0, 249.0, 264.0, 91.0, 89.0, 261.0, 258.0, 267.0, 252.0, 255.0, 261.0, 262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.685017408158804, "mean_inference_ms": 1.2078514184124396, "mean_action_processing_ms": 0.13254463761073323, "mean_env_wait_ms": 0.8448968158681478, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3174400, "num_agent_steps_trained": 3174400, "num_env_steps_sampled": 1587200, "num_env_steps_trained": 1587200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1587200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3174400, "timers": {"training_iteration_time_ms": 3710.841, "learn_time_ms": 1171.544, "learn_throughput": 10925.753, "synch_weights_time_ms": 9.167}, "counters": {"num_env_steps_sampled": 1587200, "num_env_steps_trained": 1587200, "num_agent_steps_sampled": 3174400, "num_agent_steps_trained": 3174400}, "done": false, "episodes_total": 3968, "training_iteration": 124, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-01", "timestamp": 1666580881, "time_this_iter_s": 3.731330394744873, "time_total_s": 466.80446887016296, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 466.80446887016296, "timesteps_since_restore": 0, "iterations_since_restore": 124, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.499999999999996, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 177.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 157.79, "shaped_reward_min": 125, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.65, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.21, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.18, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.88, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.39, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.59, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.88, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.88, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00022825569612905383, "policy_loss": 3.9655715227127075e-05, "vf_loss": 7.364105224609375, "vf_explained_var": 0.7113704681396484, "kl": 0.002136504976078868, "entropy": 1.0956169366836548, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1600000, "num_env_steps_trained": 1600000, "num_agent_steps_sampled": 3200000, "num_agent_steps_trained": 3200000}, "sampler_results": {"episode_reward_max": 576.0, "episode_reward_min": 405.0, "episode_reward_mean": 512.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 256.095}, "custom_metrics": {"sparse_reward_mean": 177.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 157.79, "shaped_reward_min": 125, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.65, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.21, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.18, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 7, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 6, "potting_onion_agent_0_mean": 13.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.88, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.39, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.59, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.88, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.88, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6849697162845529, "mean_inference_ms": 1.2077556939870815, "mean_action_processing_ms": 0.1325400427025592, "mean_env_wait_ms": 0.8445976040558727, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 576.0, "episode_reward_min": 405.0, "episode_reward_mean": 512.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 256.095}, "hist_stats": {"episode_reward": [530.0, 519.0, 513.0, 527.0, 519.0, 527.0, 456.0, 446.0, 525.0, 507.0, 522.0, 522.0, 519.0, 522.0, 519.0, 519.0, 519.0, 513.0, 516.0, 510.0, 522.0, 522.0, 525.0, 525.0, 513.0, 513.0, 516.0, 530.0, 519.0, 516.0, 573.0, 525.0, 468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 268.0, 264.0, 255.0, 234.0, 279.0, 267.0, 260.0, 259.0, 260.0, 269.0, 258.0, 237.0, 219.0, 226.0, 220.0, 263.0, 262.0, 251.0, 256.0, 270.0, 252.0, 252.0, 270.0, 262.0, 257.0, 259.0, 263.0, 261.0, 258.0, 275.0, 244.0, 252.0, 267.0, 252.0, 261.0, 264.0, 252.0, 250.0, 260.0, 246.0, 276.0, 264.0, 258.0, 259.0, 266.0, 251.0, 274.0, 259.0, 254.0, 253.0, 260.0, 259.0, 257.0, 269.0, 261.0, 257.0, 262.0, 266.0, 250.0, 290.0, 283.0, 269.0, 256.0, 220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6849697162845529, "mean_inference_ms": 1.2077556939870815, "mean_action_processing_ms": 0.1325400427025592, "mean_env_wait_ms": 0.8445976040558727, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3200000, "num_agent_steps_trained": 3200000, "num_env_steps_sampled": 1600000, "num_env_steps_trained": 1600000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1600000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3200000, "timers": {"training_iteration_time_ms": 3695.721, "learn_time_ms": 1159.375, "learn_throughput": 11040.435, "synch_weights_time_ms": 9.116}, "counters": {"num_env_steps_sampled": 1600000, "num_env_steps_trained": 1600000, "num_agent_steps_sampled": 3200000, "num_agent_steps_trained": 3200000}, "done": false, "episodes_total": 4000, "training_iteration": 125, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-05", "timestamp": 1666580885, "time_this_iter_s": 3.5378715991973877, "time_total_s": 470.34234046936035, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 470.34234046936035, "timesteps_since_restore": 0, "iterations_since_restore": 125, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.800000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 175.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.43, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.41, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.95, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.17, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.96, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.55, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.96, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.96, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008826033445075154, "policy_loss": -0.00106078723911196, "vf_loss": 7.301456928253174, "vf_explained_var": 0.7096636295318604, "kl": 0.0017461793031543493, "entropy": 1.1039214134216309, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1612800, "num_env_steps_trained": 1612800, "num_agent_steps_sampled": 3225600, "num_agent_steps_trained": 3225600}, "sampler_results": {"episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 506.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 253.215}, "custom_metrics": {"sparse_reward_mean": 175.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.43, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.41, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 13.95, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.17, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.43, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.71, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.96, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.34, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.3, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.55, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.52, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.71, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.96, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.71, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.96, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6849329471543086, "mean_inference_ms": 1.2076212947173204, "mean_action_processing_ms": 0.13253184470704837, "mean_env_wait_ms": 0.8442733889150424, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 506.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 253.215}, "hist_stats": {"episode_reward": [468.0, 519.0, 516.0, 525.0, 516.0, 525.0, 467.0, 519.0, 516.0, 516.0, 522.0, 519.0, 524.0, 522.0, 470.0, 522.0, 516.0, 513.0, 522.0, 522.0, 522.0, 573.0, 525.0, 519.0, 405.0, 525.0, 522.0, 476.0, 525.0, 519.0, 525.0, 516.0, 453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [220.0, 248.0, 264.0, 255.0, 248.0, 268.0, 259.0, 266.0, 249.0, 267.0, 253.0, 272.0, 229.0, 238.0, 269.0, 250.0, 265.0, 251.0, 256.0, 260.0, 240.0, 282.0, 254.0, 265.0, 262.0, 262.0, 262.0, 260.0, 233.0, 237.0, 264.0, 258.0, 255.0, 261.0, 249.0, 264.0, 258.0, 264.0, 255.0, 267.0, 268.0, 254.0, 288.0, 285.0, 265.0, 260.0, 252.0, 267.0, 210.0, 195.0, 257.0, 268.0, 244.0, 278.0, 233.0, 243.0, 256.0, 269.0, 251.0, 268.0, 260.0, 265.0, 255.0, 261.0, 245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6849329471543086, "mean_inference_ms": 1.2076212947173204, "mean_action_processing_ms": 0.13253184470704837, "mean_env_wait_ms": 0.8442733889150424, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3225600, "num_agent_steps_trained": 3225600, "num_env_steps_sampled": 1612800, "num_env_steps_trained": 1612800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1612800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3225600, "timers": {"training_iteration_time_ms": 3670.112, "learn_time_ms": 1151.969, "learn_throughput": 11111.414, "synch_weights_time_ms": 9.104}, "counters": {"num_env_steps_sampled": 1612800, "num_env_steps_trained": 1612800, "num_agent_steps_sampled": 3225600, "num_agent_steps_trained": 3225600}, "done": false, "episodes_total": 4032, "training_iteration": 126, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-08", "timestamp": 1666580888, "time_this_iter_s": 3.6375370025634766, "time_total_s": 473.9798774719238, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 473.9798774719238, "timesteps_since_restore": 0, "iterations_since_restore": 126, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 174.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.37, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.39, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.9, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.9, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.71, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.9, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.71, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.9, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.71, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0034485086798667908, "policy_loss": -0.003633841872215271, "vf_loss": 7.3317694664001465, "vf_explained_var": 0.6929588317871094, "kl": 0.001574978232383728, "entropy": 1.0956857204437256, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1625600, "num_env_steps_trained": 1625600, "num_agent_steps_sampled": 3251200, "num_agent_steps_trained": 3251200}, "sampler_results": {"episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 505.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 252.785}, "custom_metrics": {"sparse_reward_mean": 174.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.37, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.39, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.9, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.9, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.71, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.37, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.47, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.9, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.71, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.9, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.71, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684867033205738, "mean_inference_ms": 1.2074572036214568, "mean_action_processing_ms": 0.1325194643634045, "mean_env_wait_ms": 0.8439127867584169, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 505.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 298.0}, "policy_reward_mean": {"ppo": 252.785}, "hist_stats": {"episode_reward": [453.0, 467.0, 519.0, 524.0, 513.0, 468.0, 516.0, 519.0, 507.0, 519.0, 408.0, 530.0, 519.0, 519.0, 519.0, 522.0, 573.0, 573.0, 513.0, 522.0, 462.0, 519.0, 519.0, 468.0, 573.0, 525.0, 522.0, 530.0, 436.0, 519.0, 513.0, 576.0, 405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [245.0, 208.0, 219.0, 248.0, 254.0, 265.0, 271.0, 253.0, 254.0, 259.0, 224.0, 244.0, 252.0, 264.0, 262.0, 257.0, 256.0, 251.0, 265.0, 254.0, 202.0, 206.0, 275.0, 255.0, 271.0, 248.0, 264.0, 255.0, 257.0, 262.0, 273.0, 249.0, 298.0, 275.0, 294.0, 279.0, 243.0, 270.0, 257.0, 265.0, 221.0, 241.0, 252.0, 267.0, 260.0, 259.0, 224.0, 244.0, 289.0, 284.0, 258.0, 267.0, 266.0, 256.0, 267.0, 263.0, 220.0, 216.0, 251.0, 268.0, 254.0, 259.0, 288.0, 288.0, 207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684867033205738, "mean_inference_ms": 1.2074572036214568, "mean_action_processing_ms": 0.1325194643634045, "mean_env_wait_ms": 0.8439127867584169, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3251200, "num_agent_steps_trained": 3251200, "num_env_steps_sampled": 1625600, "num_env_steps_trained": 1625600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1625600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3251200, "timers": {"training_iteration_time_ms": 3636.835, "learn_time_ms": 1150.015, "learn_throughput": 11130.293, "synch_weights_time_ms": 9.702}, "counters": {"num_env_steps_sampled": 1625600, "num_env_steps_trained": 1625600, "num_agent_steps_sampled": 3251200, "num_agent_steps_trained": 3251200}, "done": false, "episodes_total": 4064, "training_iteration": 127, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-12", "timestamp": 1666580892, "time_this_iter_s": 3.6391212940216064, "time_total_s": 477.61899876594543, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 477.61899876594543, "timesteps_since_restore": 0, "iterations_since_restore": 127, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.5, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 174.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.31, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.42, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.16, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.97, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.91, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.77, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.91, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.77, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.91, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.77, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002333069918677211, "policy_loss": -0.002517772139981389, "vf_loss": 7.354443550109863, "vf_explained_var": 0.7034727334976196, "kl": 0.001937449211254716, "entropy": 1.1014816761016846, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1638400, "num_env_steps_trained": 1638400, "num_agent_steps_sampled": 3276800, "num_agent_steps_trained": 3276800}, "sampler_results": {"episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 505.91, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 252.955}, "custom_metrics": {"sparse_reward_mean": 174.8, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 156.31, "shaped_reward_min": 60, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.42, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.16, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.97, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.42, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.41, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.23, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.91, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.77, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.11, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.47, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.45, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.91, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.77, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.91, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.77, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6847936163608077, "mean_inference_ms": 1.207310297745448, "mean_action_processing_ms": 0.1325129850608212, "mean_env_wait_ms": 0.8436156857138133, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 576.0, "episode_reward_min": 180.0, "episode_reward_mean": 505.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 252.955}, "hist_stats": {"episode_reward": [405.0, 468.0, 522.0, 519.0, 516.0, 519.0, 519.0, 522.0, 404.0, 522.0, 524.0, 519.0, 522.0, 525.0, 525.0, 522.0, 180.0, 519.0, 516.0, 522.0, 456.0, 464.0, 464.0, 525.0, 522.0, 522.0, 516.0, 525.0, 525.0, 525.0, 522.0, 504.0, 513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [207.0, 198.0, 240.0, 228.0, 267.0, 255.0, 264.0, 255.0, 254.0, 262.0, 254.0, 265.0, 261.0, 258.0, 249.0, 273.0, 203.0, 201.0, 261.0, 261.0, 269.0, 255.0, 261.0, 258.0, 255.0, 267.0, 260.0, 265.0, 269.0, 256.0, 264.0, 258.0, 83.0, 97.0, 254.0, 265.0, 268.0, 248.0, 254.0, 268.0, 230.0, 226.0, 213.0, 251.0, 228.0, 236.0, 277.0, 248.0, 255.0, 267.0, 265.0, 257.0, 261.0, 255.0, 258.0, 267.0, 262.0, 263.0, 249.0, 276.0, 268.0, 254.0, 257.0, 247.0, 243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6847936163608077, "mean_inference_ms": 1.207310297745448, "mean_action_processing_ms": 0.1325129850608212, "mean_env_wait_ms": 0.8436156857138133, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3276800, "num_agent_steps_trained": 3276800, "num_env_steps_sampled": 1638400, "num_env_steps_trained": 1638400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1638400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3276800, "timers": {"training_iteration_time_ms": 3645.971, "learn_time_ms": 1154.61, "learn_throughput": 11085.992, "synch_weights_time_ms": 9.907}, "counters": {"num_env_steps_sampled": 1638400, "num_env_steps_trained": 1638400, "num_agent_steps_sampled": 3276800, "num_agent_steps_trained": 3276800}, "done": false, "episodes_total": 4096, "training_iteration": 128, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-16", "timestamp": 1666580896, "time_this_iter_s": 3.716869592666626, "time_total_s": 481.33586835861206, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 481.33586835861206, "timesteps_since_restore": 0, "iterations_since_restore": 128, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.833333333333332, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 156.82, "shaped_reward_min": 125, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.02, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.46, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.79, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.53, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.95, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.53, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.53, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011624525068327785, "policy_loss": 0.0009821219136938453, "vf_loss": 7.322172164916992, "vf_explained_var": 0.6801432371139526, "kl": 0.0017549579497426748, "entropy": 1.1037721633911133, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1651200, "num_env_steps_trained": 1651200, "num_agent_steps_sampled": 3302400, "num_agent_steps_trained": 3302400}, "sampler_results": {"episode_reward_max": 576.0, "episode_reward_min": 405.0, "episode_reward_mean": 509.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 254.61}, "custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 156.82, "shaped_reward_min": 125, "shaped_reward_max": 176, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.02, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.29, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 14.46, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 14.79, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 14.53, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.95, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.5, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 14.53, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 14.53, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6846920137628598, "mean_inference_ms": 1.2071382331619824, "mean_action_processing_ms": 0.13250689057418247, "mean_env_wait_ms": 0.8433088508691953, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 576.0, "episode_reward_min": 405.0, "episode_reward_mean": 509.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 296.0}, "policy_reward_mean": {"ppo": 254.61}, "hist_stats": {"episode_reward": [513.0, 482.0, 513.0, 519.0, 522.0, 525.0, 530.0, 513.0, 519.0, 570.0, 513.0, 519.0, 522.0, 470.0, 525.0, 533.0, 525.0, 516.0, 525.0, 405.0, 525.0, 516.0, 519.0, 522.0, 519.0, 525.0, 459.0, 519.0, 530.0, 405.0, 519.0, 482.0, 530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 270.0, 236.0, 246.0, 257.0, 256.0, 260.0, 259.0, 260.0, 262.0, 262.0, 263.0, 265.0, 265.0, 256.0, 257.0, 259.0, 260.0, 283.0, 287.0, 257.0, 256.0, 243.0, 276.0, 265.0, 257.0, 236.0, 234.0, 257.0, 268.0, 263.0, 270.0, 266.0, 259.0, 248.0, 268.0, 266.0, 259.0, 209.0, 196.0, 274.0, 251.0, 257.0, 259.0, 266.0, 253.0, 264.0, 258.0, 254.0, 265.0, 243.0, 282.0, 235.0, 224.0, 260.0, 259.0, 260.0, 270.0, 193.0, 212.0, 257.0, 262.0, 239.0, 243.0, 259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6846920137628598, "mean_inference_ms": 1.2071382331619824, "mean_action_processing_ms": 0.13250689057418247, "mean_env_wait_ms": 0.8433088508691953, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3302400, "num_agent_steps_trained": 3302400, "num_env_steps_sampled": 1651200, "num_env_steps_trained": 1651200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1651200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3302400, "timers": {"training_iteration_time_ms": 3646.276, "learn_time_ms": 1156.884, "learn_throughput": 11064.203, "synch_weights_time_ms": 10.691}, "counters": {"num_env_steps_sampled": 1651200, "num_env_steps_trained": 1651200, "num_agent_steps_sampled": 3302400, "num_agent_steps_trained": 3302400}, "done": false, "episodes_total": 4128, "training_iteration": 129, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-20", "timestamp": 1666580900, "time_this_iter_s": 3.720264434814453, "time_total_s": 485.0561327934265, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 485.0561327934265, "timesteps_since_restore": 0, "iterations_since_restore": 129, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.78, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 155.87, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.87, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.37, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.41, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.91, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.54, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.41, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.54, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.54, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0018369148019701242, "policy_loss": 0.0016497662290930748, "vf_loss": 7.313329219818115, "vf_explained_var": 0.7119976282119751, "kl": 0.001812935690395534, "entropy": 1.0883688926696777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1664000, "num_env_steps_trained": 1664000, "num_agent_steps_sampled": 3328000, "num_agent_steps_trained": 3328000}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 508.27, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 254.135}, "custom_metrics": {"sparse_reward_mean": 176.2, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 155.87, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.87, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.37, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.41, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 14.91, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.52, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.54, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.13, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.49, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 6, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.19, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.46, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.41, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.54, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.54, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6846467193085607, "mean_inference_ms": 1.2069910164086037, "mean_action_processing_ms": 0.13250549815495508, "mean_env_wait_ms": 0.8430438612717461, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 508.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 254.135}, "hist_stats": {"episode_reward": [530.0, 522.0, 459.0, 522.0, 530.0, 513.0, 519.0, 470.0, 522.0, 522.0, 467.0, 516.0, 525.0, 473.0, 522.0, 516.0, 510.0, 519.0, 516.0, 519.0, 525.0, 525.0, 476.0, 513.0, 455.0, 576.0, 513.0, 576.0, 458.0, 513.0, 516.0, 507.0, 522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 271.0, 252.0, 270.0, 225.0, 234.0, 256.0, 266.0, 252.0, 278.0, 269.0, 244.0, 259.0, 260.0, 243.0, 227.0, 262.0, 260.0, 272.0, 250.0, 240.0, 227.0, 250.0, 266.0, 259.0, 266.0, 236.0, 237.0, 262.0, 260.0, 257.0, 259.0, 245.0, 265.0, 260.0, 259.0, 261.0, 255.0, 250.0, 269.0, 265.0, 260.0, 258.0, 267.0, 239.0, 237.0, 260.0, 253.0, 232.0, 223.0, 296.0, 280.0, 252.0, 261.0, 290.0, 286.0, 222.0, 236.0, 255.0, 258.0, 267.0, 249.0, 244.0, 263.0, 245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6846467193085607, "mean_inference_ms": 1.2069910164086037, "mean_action_processing_ms": 0.13250549815495508, "mean_env_wait_ms": 0.8430438612717461, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3328000, "num_agent_steps_trained": 3328000, "num_env_steps_sampled": 1664000, "num_env_steps_trained": 1664000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1664000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3328000, "timers": {"training_iteration_time_ms": 3653.288, "learn_time_ms": 1161.453, "learn_throughput": 11020.674, "synch_weights_time_ms": 11.693}, "counters": {"num_env_steps_sampled": 1664000, "num_env_steps_trained": 1664000, "num_agent_steps_sampled": 3328000, "num_agent_steps_trained": 3328000}, "done": false, "episodes_total": 4160, "training_iteration": 130, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-24", "timestamp": 1666580904, "time_this_iter_s": 3.7706971168518066, "time_total_s": 488.8268299102783, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 488.8268299102783, "timesteps_since_restore": 0, "iterations_since_restore": 130, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 175.0, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 154.88, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.49, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.28, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.91, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.02, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.55, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.22, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.02, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.55, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.02, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.55, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003063816111534834, "policy_loss": -0.0004948408459313214, "vf_loss": 7.325481414794922, "vf_explained_var": 0.7079624533653259, "kl": 0.0018343559931963682, "entropy": 1.0881778001785278, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1676800, "num_env_steps_trained": 1676800, "num_agent_steps_sampled": 3353600, "num_agent_steps_trained": 3353600}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 504.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 252.44}, "custom_metrics": {"sparse_reward_mean": 175.0, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 154.88, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.49, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.28, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 14.91, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.4, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.56, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.27, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.02, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.55, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.22, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.17, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.5, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.46, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.02, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.55, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.02, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.55, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6845801041167872, "mean_inference_ms": 1.207078100830706, "mean_action_processing_ms": 0.13249811702206304, "mean_env_wait_ms": 0.8430198604368112, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 504.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 252.44}, "hist_stats": {"episode_reward": [522.0, 473.0, 570.0, 522.0, 522.0, 410.0, 522.0, 453.0, 516.0, 519.0, 516.0, 522.0, 507.0, 516.0, 573.0, 462.0, 465.0, 516.0, 525.0, 555.0, 519.0, 468.0, 510.0, 513.0, 522.0, 513.0, 522.0, 516.0, 516.0, 573.0, 479.0, 419.0, 516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [245.0, 277.0, 241.0, 232.0, 285.0, 285.0, 252.0, 270.0, 259.0, 263.0, 204.0, 206.0, 246.0, 276.0, 226.0, 227.0, 264.0, 252.0, 265.0, 254.0, 260.0, 256.0, 248.0, 274.0, 256.0, 251.0, 272.0, 244.0, 284.0, 289.0, 231.0, 231.0, 231.0, 234.0, 261.0, 255.0, 259.0, 266.0, 269.0, 286.0, 246.0, 273.0, 221.0, 247.0, 259.0, 251.0, 256.0, 257.0, 251.0, 271.0, 243.0, 270.0, 255.0, 267.0, 252.0, 264.0, 270.0, 246.0, 277.0, 296.0, 251.0, 228.0, 223.0, 196.0, 267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6845801041167872, "mean_inference_ms": 1.207078100830706, "mean_action_processing_ms": 0.13249811702206304, "mean_env_wait_ms": 0.8430198604368112, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3353600, "num_agent_steps_trained": 3353600, "num_env_steps_sampled": 1676800, "num_env_steps_trained": 1676800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1676800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3353600, "timers": {"training_iteration_time_ms": 3668.455, "learn_time_ms": 1137.089, "learn_throughput": 11256.819, "synch_weights_time_ms": 11.163}, "counters": {"num_env_steps_sampled": 1676800, "num_env_steps_trained": 1676800, "num_agent_steps_sampled": 3353600, "num_agent_steps_trained": 3353600}, "done": false, "episodes_total": 4192, "training_iteration": 131, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-28", "timestamp": 1666580908, "time_this_iter_s": 4.0141332149505615, "time_total_s": 492.8409631252289, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 492.8409631252289, "timesteps_since_restore": 0, "iterations_since_restore": 131, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.433333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 175.8, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 155.98, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.65, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.43, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.12, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.11, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.64, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.11, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.64, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.11, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.64, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006394966039806604, "policy_loss": -0.0008195515256375074, "vf_loss": 7.270031929016113, "vf_explained_var": 0.7044593095779419, "kl": 0.001650436781346798, "entropy": 1.093894600868225, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1689600, "num_env_steps_trained": 1689600, "num_agent_steps_sampled": 3379200, "num_agent_steps_trained": 3379200}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 507.58, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 253.79}, "custom_metrics": {"sparse_reward_mean": 175.8, "sparse_reward_min": 0, "sparse_reward_max": 200, "shaped_reward_mean": 155.98, "shaped_reward_min": 9, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.65, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.43, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.12, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.26, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.11, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.64, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 3.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.62, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.37, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.57, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.11, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.64, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.11, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.64, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844890239893716, "mean_inference_ms": 1.2072226173218075, "mean_action_processing_ms": 0.13248499328671362, "mean_env_wait_ms": 0.8430095449132798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 9.0, "episode_reward_mean": 507.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 253.79}, "hist_stats": {"episode_reward": [516.0, 519.0, 465.0, 522.0, 530.0, 530.0, 519.0, 570.0, 9.0, 522.0, 573.0, 495.0, 456.0, 513.0, 522.0, 498.0, 527.0, 519.0, 473.0, 519.0, 522.0, 525.0, 510.0, 522.0, 525.0, 465.0, 525.0, 513.0, 513.0, 570.0, 570.0, 525.0, 516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 249.0, 255.0, 264.0, 233.0, 232.0, 261.0, 261.0, 264.0, 266.0, 253.0, 277.0, 250.0, 269.0, 303.0, 267.0, 3.0, 6.0, 258.0, 264.0, 294.0, 279.0, 249.0, 246.0, 212.0, 244.0, 279.0, 234.0, 273.0, 249.0, 247.0, 251.0, 262.0, 265.0, 265.0, 254.0, 231.0, 242.0, 252.0, 267.0, 263.0, 259.0, 251.0, 274.0, 240.0, 270.0, 257.0, 265.0, 263.0, 262.0, 246.0, 219.0, 254.0, 271.0, 262.0, 251.0, 261.0, 252.0, 292.0, 278.0, 290.0, 280.0, 264.0, 261.0, 269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844890239893716, "mean_inference_ms": 1.2072226173218075, "mean_action_processing_ms": 0.13248499328671362, "mean_env_wait_ms": 0.8430095449132798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3379200, "num_agent_steps_trained": 3379200, "num_env_steps_sampled": 1689600, "num_env_steps_trained": 1689600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1689600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3379200, "timers": {"training_iteration_time_ms": 3684.035, "learn_time_ms": 1133.131, "learn_throughput": 11296.134, "synch_weights_time_ms": 11.078}, "counters": {"num_env_steps_sampled": 1689600, "num_env_steps_trained": 1689600, "num_agent_steps_sampled": 3379200, "num_agent_steps_trained": 3379200}, "done": false, "episodes_total": 4224, "training_iteration": 132, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-32", "timestamp": 1666580912, "time_this_iter_s": 3.850553035736084, "time_total_s": 496.69151616096497, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 496.69151616096497, "timesteps_since_restore": 0, "iterations_since_restore": 132, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.616666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 177.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 157.9, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.94, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.88, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.32, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.19, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.82, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.53, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.19, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.82, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.19, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.82, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0012471978552639484, "policy_loss": -0.001432041055522859, "vf_loss": 7.322253227233887, "vf_explained_var": 0.7088576555252075, "kl": 0.001987504307180643, "entropy": 1.0947635173797607, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1702400, "num_env_steps_trained": 1702400, "num_agent_steps_sampled": 3404800, "num_agent_steps_trained": 3404800}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.3, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 256.15}, "custom_metrics": {"sparse_reward_mean": 177.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 157.9, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.94, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.88, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.32, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.59, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.29, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.19, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 14.82, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.53, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.43, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.19, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 14.82, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.19, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 14.82, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684376433534071, "mean_inference_ms": 1.2074014996959537, "mean_action_processing_ms": 0.1324722504419282, "mean_env_wait_ms": 0.8430133635583676, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 512.3, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 256.15}, "hist_stats": {"episode_reward": [516.0, 579.0, 530.0, 519.0, 525.0, 522.0, 519.0, 413.0, 573.0, 519.0, 522.0, 519.0, 470.0, 522.0, 573.0, 513.0, 522.0, 513.0, 519.0, 450.0, 513.0, 522.0, 180.0, 476.0, 515.0, 530.0, 519.0, 504.0, 530.0, 516.0, 519.0, 530.0, 404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 247.0, 289.0, 290.0, 254.0, 276.0, 269.0, 250.0, 262.0, 263.0, 259.0, 263.0, 259.0, 260.0, 216.0, 197.0, 295.0, 278.0, 270.0, 249.0, 259.0, 263.0, 266.0, 253.0, 239.0, 231.0, 268.0, 254.0, 282.0, 291.0, 252.0, 261.0, 260.0, 262.0, 254.0, 259.0, 256.0, 263.0, 213.0, 237.0, 253.0, 260.0, 268.0, 254.0, 88.0, 92.0, 231.0, 245.0, 248.0, 267.0, 268.0, 262.0, 264.0, 255.0, 266.0, 238.0, 264.0, 266.0, 256.0, 260.0, 267.0, 252.0, 269.0, 261.0, 204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684376433534071, "mean_inference_ms": 1.2074014996959537, "mean_action_processing_ms": 0.1324722504419282, "mean_env_wait_ms": 0.8430133635583676, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3404800, "num_agent_steps_trained": 3404800, "num_env_steps_sampled": 1702400, "num_env_steps_trained": 1702400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1702400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3404800, "timers": {"training_iteration_time_ms": 3677.997, "learn_time_ms": 1133.053, "learn_throughput": 11296.916, "synch_weights_time_ms": 11.447}, "counters": {"num_env_steps_sampled": 1702400, "num_env_steps_trained": 1702400, "num_agent_steps_sampled": 3404800, "num_agent_steps_trained": 3404800}, "done": false, "episodes_total": 4256, "training_iteration": 133, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-36", "timestamp": 1666580916, "time_this_iter_s": 3.7330322265625, "time_total_s": 500.42454838752747, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 500.42454838752747, "timesteps_since_restore": 0, "iterations_since_restore": 133, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.92, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 180.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 159.85, "shaped_reward_min": 124, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 15.44, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.25, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.03, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.25, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.25, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002365380059927702, "policy_loss": -0.002549594035372138, "vf_loss": 7.225664138793945, "vf_explained_var": 0.7080831527709961, "kl": 0.0018610151018947363, "entropy": 1.0767040252685547, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1715200, "num_env_steps_trained": 1715200, "num_agent_steps_sampled": 3430400, "num_agent_steps_trained": 3430400}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 379.0, "episode_reward_mean": 520.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 184.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.325}, "custom_metrics": {"sparse_reward_mean": 180.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 159.85, "shaped_reward_min": 124, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 15.44, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.46, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.48, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.25, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 19, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.03, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.39, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.25, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 19, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.25, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 19, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842959995242882, "mean_inference_ms": 1.2073346375744007, "mean_action_processing_ms": 0.13245642360808568, "mean_env_wait_ms": 0.8428054226282944, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 379.0, "episode_reward_mean": 520.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 184.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.325}, "hist_stats": {"episode_reward": [404.0, 519.0, 516.0, 519.0, 576.0, 522.0, 513.0, 473.0, 484.0, 516.0, 516.0, 527.0, 522.0, 522.0, 522.0, 522.0, 464.0, 516.0, 530.0, 516.0, 519.0, 516.0, 519.0, 522.0, 516.0, 476.0, 522.0, 513.0, 510.0, 522.0, 519.0, 555.0, 522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [204.0, 200.0, 264.0, 255.0, 251.0, 265.0, 254.0, 265.0, 281.0, 295.0, 266.0, 256.0, 262.0, 251.0, 234.0, 239.0, 243.0, 241.0, 268.0, 248.0, 255.0, 261.0, 257.0, 270.0, 269.0, 253.0, 260.0, 262.0, 268.0, 254.0, 254.0, 268.0, 236.0, 228.0, 251.0, 265.0, 271.0, 259.0, 272.0, 244.0, 259.0, 260.0, 254.0, 262.0, 246.0, 273.0, 262.0, 260.0, 264.0, 252.0, 240.0, 236.0, 255.0, 267.0, 262.0, 251.0, 248.0, 262.0, 261.0, 261.0, 253.0, 266.0, 268.0, 287.0, 253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842959995242882, "mean_inference_ms": 1.2073346375744007, "mean_action_processing_ms": 0.13245642360808568, "mean_env_wait_ms": 0.8428054226282944, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3430400, "num_agent_steps_trained": 3430400, "num_env_steps_sampled": 1715200, "num_env_steps_trained": 1715200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1715200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3430400, "timers": {"training_iteration_time_ms": 3671.115, "learn_time_ms": 1132.114, "learn_throughput": 11306.28, "synch_weights_time_ms": 12.164}, "counters": {"num_env_steps_sampled": 1715200, "num_env_steps_trained": 1715200, "num_agent_steps_sampled": 3430400, "num_agent_steps_trained": 3430400}, "done": false, "episodes_total": 4288, "training_iteration": 134, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-40", "timestamp": 1666580920, "time_this_iter_s": 3.6687631607055664, "time_total_s": 504.09331154823303, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 504.09331154823303, "timesteps_since_restore": 0, "iterations_since_restore": 134, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.883333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 181.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 159.96, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.95, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.44, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.25, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.12, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.25, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.12, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.25, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.12, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008085625013336539, "policy_loss": -0.0010100638028234243, "vf_loss": 7.388925552368164, "vf_explained_var": 0.7019363641738892, "kl": 0.0018008106853812933, "entropy": 1.074782371520996, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1728000, "num_env_steps_trained": 1728000, "num_agent_steps_sampled": 3456000, "num_agent_steps_trained": 3456000}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 521.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.98}, "custom_metrics": {"sparse_reward_mean": 181.0, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 159.96, "shaped_reward_min": 60, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.95, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.44, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.44, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.42, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.25, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.12, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 3.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.48, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.29, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.25, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.12, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.25, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.12, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843184248913212, "mean_inference_ms": 1.2072503599124262, "mean_action_processing_ms": 0.13244891511509294, "mean_env_wait_ms": 0.8425490004197971, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 180.0, "episode_reward_mean": 521.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 260.98}, "hist_stats": {"episode_reward": [522.0, 507.0, 522.0, 525.0, 573.0, 519.0, 573.0, 470.0, 522.0, 456.0, 516.0, 525.0, 525.0, 513.0, 379.0, 464.0, 567.0, 522.0, 510.0, 476.0, 524.0, 522.0, 513.0, 516.0, 522.0, 576.0, 573.0, 522.0, 530.0, 576.0, 522.0, 576.0, 513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 269.0, 261.0, 246.0, 267.0, 255.0, 256.0, 269.0, 292.0, 281.0, 260.0, 259.0, 271.0, 302.0, 237.0, 233.0, 273.0, 249.0, 218.0, 238.0, 249.0, 267.0, 257.0, 268.0, 272.0, 253.0, 248.0, 265.0, 184.0, 195.0, 233.0, 231.0, 283.0, 284.0, 276.0, 246.0, 267.0, 243.0, 236.0, 240.0, 268.0, 256.0, 266.0, 256.0, 264.0, 249.0, 269.0, 247.0, 277.0, 245.0, 289.0, 287.0, 284.0, 289.0, 265.0, 257.0, 275.0, 255.0, 283.0, 293.0, 267.0, 255.0, 296.0, 280.0, 257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843184248913212, "mean_inference_ms": 1.2072503599124262, "mean_action_processing_ms": 0.13244891511509294, "mean_env_wait_ms": 0.8425490004197971, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3456000, "num_agent_steps_trained": 3456000, "num_env_steps_sampled": 1728000, "num_env_steps_trained": 1728000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1728000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3456000, "timers": {"training_iteration_time_ms": 3693.588, "learn_time_ms": 1146.458, "learn_throughput": 11164.821, "synch_weights_time_ms": 12.713}, "counters": {"num_env_steps_sampled": 1728000, "num_env_steps_trained": 1728000, "num_agent_steps_sampled": 3456000, "num_agent_steps_trained": 3456000}, "done": false, "episodes_total": 4320, "training_iteration": 135, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-44", "timestamp": 1666580924, "time_this_iter_s": 3.7817649841308594, "time_total_s": 507.8750765323639, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 507.8750765323639, "timesteps_since_restore": 0, "iterations_since_restore": 135, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 183.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 161.99, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.75, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.92, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.27, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.61, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.61, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.61, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024380050599575043, "policy_loss": -0.002622002735733986, "vf_loss": 7.156221389770508, "vf_explained_var": 0.6949341297149658, "kl": 0.001646326039917767, "entropy": 1.0632463693618774, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1740800, "num_env_steps_trained": 1740800, "num_agent_steps_sampled": 3481600, "num_agent_steps_trained": 3481600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 529.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 264.595}, "custom_metrics": {"sparse_reward_mean": 183.6, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 161.99, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.75, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.92, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.27, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.41, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.23, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.61, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.7, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.61, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.61, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843760468855757, "mean_inference_ms": 1.2071599936525805, "mean_action_processing_ms": 0.1324412382116291, "mean_env_wait_ms": 0.8422914478679702, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 529.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 264.595}, "hist_stats": {"episode_reward": [513.0, 522.0, 421.0, 516.0, 519.0, 516.0, 519.0, 504.0, 576.0, 530.0, 519.0, 525.0, 507.0, 522.0, 573.0, 573.0, 573.0, 527.0, 522.0, 573.0, 521.0, 522.0, 504.0, 519.0, 522.0, 579.0, 522.0, 570.0, 516.0, 570.0, 530.0, 522.0, 501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 256.0, 267.0, 255.0, 216.0, 205.0, 270.0, 246.0, 263.0, 256.0, 260.0, 256.0, 263.0, 256.0, 244.0, 260.0, 280.0, 296.0, 258.0, 272.0, 271.0, 248.0, 289.0, 236.0, 243.0, 264.0, 254.0, 268.0, 286.0, 287.0, 283.0, 290.0, 280.0, 293.0, 265.0, 262.0, 270.0, 252.0, 290.0, 283.0, 250.0, 271.0, 264.0, 258.0, 240.0, 264.0, 261.0, 258.0, 253.0, 269.0, 278.0, 301.0, 265.0, 257.0, 280.0, 290.0, 258.0, 258.0, 292.0, 278.0, 264.0, 266.0, 261.0, 261.0, 253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843760468855757, "mean_inference_ms": 1.2071599936525805, "mean_action_processing_ms": 0.1324412382116291, "mean_env_wait_ms": 0.8422914478679702, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3481600, "num_agent_steps_trained": 3481600, "num_env_steps_sampled": 1740800, "num_env_steps_trained": 1740800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1740800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3481600, "timers": {"training_iteration_time_ms": 3693.703, "learn_time_ms": 1147.257, "learn_throughput": 11157.045, "synch_weights_time_ms": 13.631}, "counters": {"num_env_steps_sampled": 1740800, "num_env_steps_trained": 1740800, "num_agent_steps_sampled": 3481600, "num_agent_steps_trained": 3481600}, "done": false, "episodes_total": 4352, "training_iteration": 136, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-48", "timestamp": 1666580928, "time_this_iter_s": 3.640089273452759, "time_total_s": 511.51516580581665, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 511.51516580581665, "timesteps_since_restore": 0, "iterations_since_restore": 136, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.866666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 184.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 163.23, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.17, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.63, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.17, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.17, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016262221615761518, "policy_loss": -0.0018370456527918577, "vf_loss": 7.367179870605469, "vf_explained_var": 0.6974908113479614, "kl": 0.001954792533069849, "entropy": 1.0517871379852295, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1753600, "num_env_steps_trained": 1753600, "num_agent_steps_sampled": 3507200, "num_agent_steps_trained": 3507200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 531.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 265.815}, "custom_metrics": {"sparse_reward_mean": 184.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 163.23, "shaped_reward_min": 60, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.17, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.41, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.63, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.17, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.17, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684422121241015, "mean_inference_ms": 1.2070898198034101, "mean_action_processing_ms": 0.13243730383591618, "mean_env_wait_ms": 0.8420708101762324, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 180.0, "episode_reward_mean": 531.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 265.815}, "hist_stats": {"episode_reward": [501.0, 519.0, 513.0, 519.0, 522.0, 573.0, 573.0, 567.0, 561.0, 567.0, 473.0, 484.0, 516.0, 180.0, 516.0, 519.0, 519.0, 521.0, 530.0, 525.0, 525.0, 570.0, 465.0, 573.0, 527.0, 522.0, 501.0, 527.0, 524.0, 518.0, 498.0, 516.0, 519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 248.0, 264.0, 255.0, 252.0, 261.0, 249.0, 270.0, 258.0, 264.0, 295.0, 278.0, 281.0, 292.0, 284.0, 283.0, 278.0, 283.0, 275.0, 292.0, 232.0, 241.0, 263.0, 221.0, 259.0, 257.0, 91.0, 89.0, 270.0, 246.0, 261.0, 258.0, 272.0, 247.0, 271.0, 250.0, 262.0, 268.0, 263.0, 262.0, 260.0, 265.0, 293.0, 277.0, 226.0, 239.0, 283.0, 290.0, 264.0, 263.0, 251.0, 271.0, 257.0, 244.0, 277.0, 250.0, 260.0, 264.0, 276.0, 242.0, 242.0, 256.0, 255.0, 261.0, 255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684422121241015, "mean_inference_ms": 1.2070898198034101, "mean_action_processing_ms": 0.13243730383591618, "mean_env_wait_ms": 0.8420708101762324, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3507200, "num_agent_steps_trained": 3507200, "num_env_steps_sampled": 1753600, "num_env_steps_trained": 1753600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1753600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3507200, "timers": {"training_iteration_time_ms": 3692.907, "learn_time_ms": 1143.935, "learn_throughput": 11189.442, "synch_weights_time_ms": 13.824}, "counters": {"num_env_steps_sampled": 1753600, "num_env_steps_trained": 1753600, "num_agent_steps_sampled": 3507200, "num_agent_steps_trained": 3507200}, "done": false, "episodes_total": 4384, "training_iteration": 137, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-51", "timestamp": 1666580931, "time_this_iter_s": 3.6196677684783936, "time_total_s": 515.134833574295, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 515.134833574295, "timesteps_since_restore": 0, "iterations_since_restore": 137, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 188.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 166.32, "shaped_reward_min": 132, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.64, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.35, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.59, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.35, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.35, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0028702523559331894, "policy_loss": -0.0030703365337103605, "vf_loss": 7.255067825317383, "vf_explained_var": 0.7090296745300293, "kl": 0.002137089380994439, "entropy": 1.05084228515625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1766400, "num_env_steps_trained": 1766400, "num_agent_steps_sampled": 3532800, "num_agent_steps_trained": 3532800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 393.0, "episode_reward_mean": 542.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.16}, "custom_metrics": {"sparse_reward_mean": 188.0, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 166.32, "shaped_reward_min": 132, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.64, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.35, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.07, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.35, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.59, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.35, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.35, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844500985302645, "mean_inference_ms": 1.2070711367201046, "mean_action_processing_ms": 0.13244079028015465, "mean_env_wait_ms": 0.8419031688694641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 393.0, "episode_reward_mean": 542.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.16}, "hist_stats": {"episode_reward": [519.0, 522.0, 513.0, 573.0, 579.0, 516.0, 525.0, 579.0, 567.0, 522.0, 570.0, 570.0, 530.0, 522.0, 573.0, 573.0, 530.0, 522.0, 519.0, 582.0, 525.0, 530.0, 525.0, 515.0, 567.0, 522.0, 522.0, 576.0, 519.0, 573.0, 573.0, 525.0, 576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [255.0, 264.0, 251.0, 271.0, 261.0, 252.0, 299.0, 274.0, 288.0, 291.0, 267.0, 249.0, 262.0, 263.0, 282.0, 297.0, 275.0, 292.0, 255.0, 267.0, 287.0, 283.0, 290.0, 280.0, 263.0, 267.0, 253.0, 269.0, 286.0, 287.0, 279.0, 294.0, 263.0, 267.0, 263.0, 259.0, 259.0, 260.0, 288.0, 294.0, 261.0, 264.0, 260.0, 270.0, 260.0, 265.0, 262.0, 253.0, 279.0, 288.0, 250.0, 272.0, 267.0, 255.0, 290.0, 286.0, 271.0, 248.0, 277.0, 296.0, 293.0, 280.0, 252.0, 273.0, 276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844500985302645, "mean_inference_ms": 1.2070711367201046, "mean_action_processing_ms": 0.13244079028015465, "mean_env_wait_ms": 0.8419031688694641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3532800, "num_agent_steps_trained": 3532800, "num_env_steps_sampled": 1766400, "num_env_steps_trained": 1766400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1766400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3532800, "timers": {"training_iteration_time_ms": 3703.337, "learn_time_ms": 1139.988, "learn_throughput": 11228.189, "synch_weights_time_ms": 14.732}, "counters": {"num_env_steps_sampled": 1766400, "num_env_steps_trained": 1766400, "num_agent_steps_sampled": 3532800, "num_agent_steps_trained": 3532800}, "done": false, "episodes_total": 4416, "training_iteration": 138, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-55", "timestamp": 1666580935, "time_this_iter_s": 3.8024544715881348, "time_total_s": 518.9372880458832, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 518.9372880458832, "timesteps_since_restore": 0, "iterations_since_restore": 138, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.8, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 186.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.67, "shaped_reward_min": 132, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.36, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.36, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.36, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001902009709738195, "policy_loss": -0.002099734963849187, "vf_loss": 7.285922050476074, "vf_explained_var": 0.6993359327316284, "kl": 0.0017973913345485926, "entropy": 1.0617306232452393, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1779200, "num_env_steps_trained": 1779200, "num_agent_steps_sampled": 3558400, "num_agent_steps_trained": 3558400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 379.0, "episode_reward_mean": 538.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 269.435}, "custom_metrics": {"sparse_reward_mean": 186.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.67, "shaped_reward_min": 132, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.21, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.36, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.36, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.36, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844477582222729, "mean_inference_ms": 1.2070333180594002, "mean_action_processing_ms": 0.1324468637600638, "mean_env_wait_ms": 0.8417296114074044, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 379.0, "episode_reward_mean": 538.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 180.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 269.435}, "hist_stats": {"episode_reward": [576.0, 510.0, 522.0, 522.0, 576.0, 525.0, 576.0, 522.0, 573.0, 573.0, 582.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 510.0, 516.0, 519.0, 573.0, 570.0, 522.0, 516.0, 495.0, 525.0, 515.0, 582.0, 576.0, 465.0, 573.0, 576.0, 516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 261.0, 249.0, 252.0, 270.0, 255.0, 267.0, 290.0, 286.0, 256.0, 269.0, 302.0, 274.0, 253.0, 269.0, 288.0, 285.0, 279.0, 294.0, 271.0, 311.0, 296.0, 277.0, 257.0, 270.0, 266.0, 253.0, 286.0, 293.0, 269.0, 256.0, 263.0, 270.0, 237.0, 273.0, 254.0, 262.0, 267.0, 252.0, 285.0, 288.0, 283.0, 287.0, 265.0, 257.0, 254.0, 262.0, 235.0, 260.0, 267.0, 258.0, 259.0, 256.0, 297.0, 285.0, 285.0, 291.0, 231.0, 234.0, 270.0, 303.0, 290.0, 286.0, 258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844477582222729, "mean_inference_ms": 1.2070333180594002, "mean_action_processing_ms": 0.1324468637600638, "mean_env_wait_ms": 0.8417296114074044, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3558400, "num_agent_steps_trained": 3558400, "num_env_steps_sampled": 1779200, "num_env_steps_trained": 1779200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1779200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3558400, "timers": {"training_iteration_time_ms": 3695.789, "learn_time_ms": 1128.481, "learn_throughput": 11342.685, "synch_weights_time_ms": 13.949}, "counters": {"num_env_steps_sampled": 1779200, "num_env_steps_trained": 1779200, "num_agent_steps_sampled": 3558400, "num_agent_steps_trained": 3558400}, "done": false, "episodes_total": 4448, "training_iteration": 139, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-08-59", "timestamp": 1666580939, "time_this_iter_s": 3.6463100910186768, "time_total_s": 522.5835981369019, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 522.5835981369019, "timesteps_since_restore": 0, "iterations_since_restore": 139, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.700000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 185.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.19, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.11, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.24, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.84, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.88, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.43, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.66, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.65, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.43, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.66, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.43, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.66, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001023371354676783, "policy_loss": -0.0012154708383604884, "vf_loss": 7.2035813331604, "vf_explained_var": 0.7100783586502075, "kl": 0.0020363512448966503, "entropy": 1.0565154552459717, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1792000, "num_env_steps_trained": 1792000, "num_agent_steps_sampled": 3584000, "num_agent_steps_trained": 3584000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 536.39, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 268.195}, "custom_metrics": {"sparse_reward_mean": 185.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.19, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.11, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.24, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.84, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.88, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.43, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.66, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.65, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.39, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.43, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.66, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.43, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.66, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844424072145202, "mean_inference_ms": 1.206964806118154, "mean_action_processing_ms": 0.13245111758107558, "mean_env_wait_ms": 0.8415324870317537, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 536.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 268.195}, "hist_stats": {"episode_reward": [516.0, 576.0, 490.0, 393.0, 513.0, 513.0, 576.0, 576.0, 561.0, 492.0, 567.0, 527.0, 573.0, 519.0, 576.0, 522.0, 567.0, 530.0, 567.0, 522.0, 579.0, 564.0, 525.0, 519.0, 573.0, 522.0, 573.0, 573.0, 579.0, 536.0, 573.0, 527.0, 522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 258.0, 279.0, 297.0, 235.0, 255.0, 188.0, 205.0, 271.0, 242.0, 239.0, 274.0, 267.0, 309.0, 298.0, 278.0, 274.0, 287.0, 244.0, 248.0, 278.0, 289.0, 257.0, 270.0, 288.0, 285.0, 263.0, 256.0, 290.0, 286.0, 255.0, 267.0, 276.0, 291.0, 271.0, 259.0, 286.0, 281.0, 264.0, 258.0, 287.0, 292.0, 284.0, 280.0, 254.0, 271.0, 240.0, 279.0, 291.0, 282.0, 269.0, 253.0, 287.0, 286.0, 289.0, 284.0, 292.0, 287.0, 263.0, 273.0, 275.0, 298.0, 265.0, 262.0, 264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844424072145202, "mean_inference_ms": 1.206964806118154, "mean_action_processing_ms": 0.13245111758107558, "mean_env_wait_ms": 0.8415324870317537, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3584000, "num_agent_steps_trained": 3584000, "num_env_steps_sampled": 1792000, "num_env_steps_trained": 1792000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1792000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3584000, "timers": {"training_iteration_time_ms": 3686.752, "learn_time_ms": 1117.713, "learn_throughput": 11451.954, "synch_weights_time_ms": 13.08}, "counters": {"num_env_steps_sampled": 1792000, "num_env_steps_trained": 1792000, "num_agent_steps_sampled": 3584000, "num_agent_steps_trained": 3584000}, "done": false, "episodes_total": 4480, "training_iteration": 140, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-03", "timestamp": 1666580943, "time_this_iter_s": 3.671760320663452, "time_total_s": 526.2553584575653, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 526.2553584575653, "timesteps_since_restore": 0, "iterations_since_restore": 140, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.9, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 185.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.05, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.03, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.45, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.7, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.45, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.45, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001100448309443891, "policy_loss": -0.0013004107167944312, "vf_loss": 7.32533597946167, "vf_explained_var": 0.6947555541992188, "kl": 0.0018763296538963914, "entropy": 1.065138578414917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1804800, "num_env_steps_trained": 1804800, "num_agent_steps_sampled": 3609600, "num_agent_steps_trained": 3609600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 536.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 268.325}, "custom_metrics": {"sparse_reward_mean": 185.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 165.05, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.03, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.45, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.7, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.31, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.46, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.45, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.45, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684355643486077, "mean_inference_ms": 1.2068429403105816, "mean_action_processing_ms": 0.13244201724234905, "mean_env_wait_ms": 0.8412518122991941, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 536.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 268.325}, "hist_stats": {"episode_reward": [522.0, 576.0, 521.0, 570.0, 573.0, 479.0, 579.0, 518.0, 573.0, 573.0, 567.0, 522.0, 501.0, 576.0, 630.0, 525.0, 516.0, 519.0, 573.0, 527.0, 573.0, 510.0, 522.0, 473.0, 525.0, 487.0, 516.0, 570.0, 579.0, 525.0, 530.0, 379.0, 525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 287.0, 289.0, 249.0, 272.0, 269.0, 301.0, 294.0, 279.0, 240.0, 239.0, 295.0, 284.0, 262.0, 256.0, 281.0, 292.0, 269.0, 304.0, 281.0, 286.0, 273.0, 249.0, 249.0, 252.0, 287.0, 289.0, 316.0, 314.0, 268.0, 257.0, 260.0, 256.0, 257.0, 262.0, 272.0, 301.0, 273.0, 254.0, 285.0, 288.0, 254.0, 256.0, 258.0, 264.0, 231.0, 242.0, 271.0, 254.0, 238.0, 249.0, 268.0, 248.0, 295.0, 275.0, 293.0, 286.0, 261.0, 264.0, 265.0, 265.0, 180.0, 199.0, 267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684355643486077, "mean_inference_ms": 1.2068429403105816, "mean_action_processing_ms": 0.13244201724234905, "mean_env_wait_ms": 0.8412518122991941, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3609600, "num_agent_steps_trained": 3609600, "num_env_steps_sampled": 1804800, "num_env_steps_trained": 1804800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1804800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3609600, "timers": {"training_iteration_time_ms": 3659.837, "learn_time_ms": 1131.102, "learn_throughput": 11316.395, "synch_weights_time_ms": 12.953}, "counters": {"num_env_steps_sampled": 1804800, "num_env_steps_trained": 1804800, "num_agent_steps_sampled": 3609600, "num_agent_steps_trained": 3609600}, "done": false, "episodes_total": 4512, "training_iteration": 141, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-07", "timestamp": 1666580947, "time_this_iter_s": 3.75894832611084, "time_total_s": 530.0143067836761, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 530.0143067836761, "timesteps_since_restore": 0, "iterations_since_restore": 141, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.1, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 184.8, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 164.18, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.07, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.9, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.81, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 4.625937435775995e-05, "policy_loss": -0.00017143398872576654, "vf_loss": 7.427959442138672, "vf_explained_var": 0.6894431114196777, "kl": 0.0025915263686329126, "entropy": 1.0502040386199951, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1817600, "num_env_steps_trained": 1817600, "num_agent_steps_sampled": 3635200, "num_agent_steps_trained": 3635200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 533.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 266.89}, "custom_metrics": {"sparse_reward_mean": 184.8, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 164.18, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.07, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.9, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.81, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.18, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.61, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.51, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.37, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.61, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.61, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842755530238879, "mean_inference_ms": 1.2067346536692405, "mean_action_processing_ms": 0.13243207984798036, "mean_env_wait_ms": 0.8409767392353698, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 533.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 266.89}, "hist_stats": {"episode_reward": [525.0, 522.0, 473.0, 573.0, 567.0, 533.0, 552.0, 570.0, 530.0, 351.0, 525.0, 516.0, 530.0, 519.0, 522.0, 573.0, 573.0, 516.0, 533.0, 525.0, 527.0, 576.0, 473.0, 579.0, 501.0, 573.0, 525.0, 522.0, 530.0, 573.0, 519.0, 522.0, 573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 257.0, 265.0, 247.0, 226.0, 301.0, 272.0, 281.0, 286.0, 267.0, 266.0, 272.0, 280.0, 292.0, 278.0, 254.0, 276.0, 179.0, 172.0, 258.0, 267.0, 260.0, 256.0, 269.0, 261.0, 277.0, 242.0, 250.0, 272.0, 290.0, 283.0, 287.0, 286.0, 260.0, 256.0, 272.0, 261.0, 270.0, 255.0, 251.0, 276.0, 275.0, 301.0, 237.0, 236.0, 293.0, 286.0, 260.0, 241.0, 288.0, 285.0, 267.0, 258.0, 254.0, 268.0, 266.0, 264.0, 285.0, 288.0, 264.0, 255.0, 251.0, 271.0, 286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842755530238879, "mean_inference_ms": 1.2067346536692405, "mean_action_processing_ms": 0.13243207984798036, "mean_env_wait_ms": 0.8409767392353698, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3635200, "num_agent_steps_trained": 3635200, "num_env_steps_sampled": 1817600, "num_env_steps_trained": 1817600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1817600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3635200, "timers": {"training_iteration_time_ms": 3648.912, "learn_time_ms": 1135.149, "learn_throughput": 11276.048, "synch_weights_time_ms": 13.551}, "counters": {"num_env_steps_sampled": 1817600, "num_env_steps_trained": 1817600, "num_agent_steps_sampled": 3635200, "num_agent_steps_trained": 3635200}, "done": false, "episodes_total": 4544, "training_iteration": 142, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-11", "timestamp": 1666580951, "time_this_iter_s": 3.7470552921295166, "time_total_s": 533.7613620758057, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 533.7613620758057, "timesteps_since_restore": 0, "iterations_since_restore": 142, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.849999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 185.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 164.2, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.97, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.73, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.28, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.73, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.28, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.73, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.28, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0044081672094762325, "policy_loss": -0.004621940199285746, "vf_loss": 7.469595432281494, "vf_explained_var": 0.6961889266967773, "kl": 0.001743752509355545, "entropy": 1.0663713216781616, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1830400, "num_env_steps_trained": 1830400, "num_agent_steps_sampled": 3660800, "num_agent_steps_trained": 3660800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 534.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 267.1}, "custom_metrics": {"sparse_reward_mean": 185.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 164.2, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.97, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.73, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.28, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.42, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.73, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.28, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.73, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.28, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684214198815861, "mean_inference_ms": 1.2066361573929743, "mean_action_processing_ms": 0.13242315818689165, "mean_env_wait_ms": 0.8407223869871694, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 534.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 267.1}, "hist_stats": {"episode_reward": [573.0, 576.0, 521.0, 573.0, 570.0, 504.0, 522.0, 522.0, 564.0, 525.0, 539.0, 582.0, 525.0, 570.0, 579.0, 519.0, 530.0, 513.0, 573.0, 530.0, 576.0, 573.0, 530.0, 507.0, 533.0, 507.0, 516.0, 510.0, 516.0, 515.0, 530.0, 582.0, 576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 288.0, 288.0, 254.0, 267.0, 298.0, 275.0, 285.0, 285.0, 242.0, 262.0, 259.0, 263.0, 260.0, 262.0, 279.0, 285.0, 262.0, 263.0, 278.0, 261.0, 290.0, 292.0, 259.0, 266.0, 295.0, 275.0, 278.0, 301.0, 255.0, 264.0, 265.0, 265.0, 250.0, 263.0, 299.0, 274.0, 277.0, 253.0, 274.0, 302.0, 284.0, 289.0, 257.0, 273.0, 247.0, 260.0, 270.0, 263.0, 258.0, 249.0, 263.0, 253.0, 235.0, 275.0, 256.0, 260.0, 251.0, 264.0, 270.0, 260.0, 287.0, 295.0, 293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684214198815861, "mean_inference_ms": 1.2066361573929743, "mean_action_processing_ms": 0.13242315818689165, "mean_env_wait_ms": 0.8407223869871694, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3660800, "num_agent_steps_trained": 3660800, "num_env_steps_sampled": 1830400, "num_env_steps_trained": 1830400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1830400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3660800, "timers": {"training_iteration_time_ms": 3652.047, "learn_time_ms": 1134.207, "learn_throughput": 11285.42, "synch_weights_time_ms": 13.158}, "counters": {"num_env_steps_sampled": 1830400, "num_env_steps_trained": 1830400, "num_agent_steps_sampled": 3660800, "num_agent_steps_trained": 3660800}, "done": false, "episodes_total": 4576, "training_iteration": 143, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-15", "timestamp": 1666580955, "time_this_iter_s": 3.773031711578369, "time_total_s": 537.534393787384, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 537.534393787384, "timesteps_since_restore": 0, "iterations_since_restore": 143, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 184.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.5, "shaped_reward_min": 77, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.56, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.13, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.13, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.13, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001283028395846486, "policy_loss": -0.001493385061621666, "vf_loss": 7.433184623718262, "vf_explained_var": 0.6879395246505737, "kl": 0.0020021807868033648, "entropy": 1.0659205913543701, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1843200, "num_env_steps_trained": 1843200, "num_agent_steps_sampled": 3686400, "num_agent_steps_trained": 3686400}, "sampler_results": {"episode_reward_max": 579.0, "episode_reward_min": 237.0, "episode_reward_mean": 532.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 266.35}, "custom_metrics": {"sparse_reward_mean": 184.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.5, "shaped_reward_min": 77, "shaped_reward_max": 179, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.84, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.56, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.35, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.13, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.49, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.4, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.13, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.13, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842438570605083, "mean_inference_ms": 1.2065519067365493, "mean_action_processing_ms": 0.13242026295874262, "mean_env_wait_ms": 0.8405006849522966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 579.0, "episode_reward_min": 237.0, "episode_reward_mean": 532.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 266.35}, "hist_stats": {"episode_reward": [576.0, 570.0, 567.0, 570.0, 513.0, 573.0, 527.0, 519.0, 579.0, 525.0, 533.0, 525.0, 530.0, 533.0, 524.0, 516.0, 519.0, 573.0, 519.0, 522.0, 570.0, 530.0, 525.0, 527.0, 237.0, 507.0, 576.0, 570.0, 576.0, 570.0, 522.0, 530.0, 522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 287.0, 283.0, 279.0, 288.0, 284.0, 286.0, 250.0, 263.0, 286.0, 287.0, 267.0, 260.0, 257.0, 262.0, 292.0, 287.0, 260.0, 265.0, 257.0, 276.0, 265.0, 260.0, 260.0, 270.0, 268.0, 265.0, 271.0, 253.0, 266.0, 250.0, 252.0, 267.0, 282.0, 291.0, 260.0, 259.0, 261.0, 261.0, 290.0, 280.0, 268.0, 262.0, 256.0, 269.0, 257.0, 270.0, 123.0, 114.0, 258.0, 249.0, 301.0, 275.0, 293.0, 277.0, 287.0, 289.0, 275.0, 295.0, 254.0, 268.0, 249.0, 281.0, 260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842438570605083, "mean_inference_ms": 1.2065519067365493, "mean_action_processing_ms": 0.13242026295874262, "mean_env_wait_ms": 0.8405006849522966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3686400, "num_agent_steps_trained": 3686400, "num_env_steps_sampled": 1843200, "num_env_steps_trained": 1843200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1843200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3686400, "timers": {"training_iteration_time_ms": 3658.38, "learn_time_ms": 1137.758, "learn_throughput": 11250.194, "synch_weights_time_ms": 12.419}, "counters": {"num_env_steps_sampled": 1843200, "num_env_steps_trained": 1843200, "num_agent_steps_sampled": 3686400, "num_agent_steps_trained": 3686400}, "done": false, "episodes_total": 4608, "training_iteration": 144, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-19", "timestamp": 1666580959, "time_this_iter_s": 3.7352027893066406, "time_total_s": 541.2695965766907, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 541.2695965766907, "timesteps_since_restore": 0, "iterations_since_restore": 144, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.033333333333335, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 185.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 163.88, "shaped_reward_min": 136, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.96, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00016856600996106863, "policy_loss": -0.000373037182725966, "vf_loss": 7.367165565490723, "vf_explained_var": 0.690015435218811, "kl": 0.001808413420803845, "entropy": 1.0644876956939697, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1856000, "num_env_steps_trained": 1856000, "num_agent_steps_sampled": 3712000, "num_agent_steps_trained": 3712000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 422.0, "episode_reward_mean": 533.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 206.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.94}, "custom_metrics": {"sparse_reward_mean": 185.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 163.88, "shaped_reward_min": 136, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.96, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.44, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684303727724854, "mean_inference_ms": 1.2064871898020082, "mean_action_processing_ms": 0.13241503366929952, "mean_env_wait_ms": 0.8403057315186702, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 422.0, "episode_reward_mean": 533.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 206.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.94}, "hist_stats": {"episode_reward": [522.0, 473.0, 573.0, 504.0, 530.0, 576.0, 527.0, 513.0, 476.0, 573.0, 576.0, 522.0, 519.0, 530.0, 422.0, 525.0, 573.0, 527.0, 522.0, 522.0, 516.0, 576.0, 501.0, 530.0, 561.0, 522.0, 527.0, 573.0, 519.0, 525.0, 519.0, 570.0, 519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 262.0, 239.0, 234.0, 292.0, 281.0, 240.0, 264.0, 265.0, 265.0, 297.0, 279.0, 261.0, 266.0, 244.0, 269.0, 241.0, 235.0, 273.0, 300.0, 285.0, 291.0, 263.0, 259.0, 261.0, 258.0, 257.0, 273.0, 206.0, 216.0, 261.0, 264.0, 298.0, 275.0, 264.0, 263.0, 266.0, 256.0, 267.0, 255.0, 257.0, 259.0, 290.0, 286.0, 250.0, 251.0, 271.0, 259.0, 292.0, 269.0, 267.0, 255.0, 254.0, 273.0, 283.0, 290.0, 267.0, 252.0, 250.0, 275.0, 251.0, 268.0, 286.0, 284.0, 247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684303727724854, "mean_inference_ms": 1.2064871898020082, "mean_action_processing_ms": 0.13241503366929952, "mean_env_wait_ms": 0.8403057315186702, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3712000, "num_agent_steps_trained": 3712000, "num_env_steps_sampled": 1856000, "num_env_steps_trained": 1856000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1856000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3712000, "timers": {"training_iteration_time_ms": 3680.475, "learn_time_ms": 1144.678, "learn_throughput": 11182.18, "synch_weights_time_ms": 12.299}, "counters": {"num_env_steps_sampled": 1856000, "num_env_steps_trained": 1856000, "num_agent_steps_sampled": 3712000, "num_agent_steps_trained": 3712000}, "done": false, "episodes_total": 4640, "training_iteration": 145, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-23", "timestamp": 1666580963, "time_this_iter_s": 4.009440183639526, "time_total_s": 545.2790367603302, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 545.2790367603302, "timesteps_since_restore": 0, "iterations_since_restore": 145, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.849999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 186.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 164.41, "shaped_reward_min": 111, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.31, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.22, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.52, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.74, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.22, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.22, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00215164665132761, "policy_loss": 0.001941345864906907, "vf_loss": 7.410245895385742, "vf_explained_var": 0.6896635890007019, "kl": 0.0020127412863075733, "entropy": 1.061444640159607, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1868800, "num_env_steps_trained": 1868800, "num_agent_steps_sampled": 3737600, "num_agent_steps_trained": 3737600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 351.0, "episode_reward_mean": 537.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 268.605}, "custom_metrics": {"sparse_reward_mean": 186.4, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 164.41, "shaped_reward_min": 111, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.31, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.39, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.22, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.5, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.52, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.74, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.22, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.22, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684348570132233, "mean_inference_ms": 1.2065833340662058, "mean_action_processing_ms": 0.13241048060485988, "mean_env_wait_ms": 0.8404282247161632, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 351.0, "episode_reward_mean": 537.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 268.605}, "hist_stats": {"episode_reward": [519.0, 516.0, 570.0, 513.0, 503.0, 522.0, 530.0, 570.0, 527.0, 573.0, 567.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 519.0, 573.0, 516.0, 573.0, 510.0, 476.0, 525.0, 530.0, 522.0, 525.0, 516.0, 456.0, 522.0, 507.0, 507.0, 570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 272.0, 250.0, 266.0, 295.0, 275.0, 251.0, 262.0, 248.0, 255.0, 255.0, 267.0, 270.0, 260.0, 290.0, 280.0, 273.0, 254.0, 287.0, 286.0, 294.0, 273.0, 287.0, 289.0, 294.0, 279.0, 249.0, 270.0, 265.0, 257.0, 301.0, 272.0, 295.0, 284.0, 270.0, 249.0, 286.0, 287.0, 242.0, 274.0, 292.0, 281.0, 256.0, 254.0, 253.0, 223.0, 266.0, 259.0, 264.0, 266.0, 270.0, 252.0, 259.0, 266.0, 256.0, 260.0, 239.0, 217.0, 271.0, 251.0, 254.0, 253.0, 254.0, 253.0, 269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684348570132233, "mean_inference_ms": 1.2065833340662058, "mean_action_processing_ms": 0.13241048060485988, "mean_env_wait_ms": 0.8404282247161632, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3737600, "num_agent_steps_trained": 3737600, "num_env_steps_sampled": 1868800, "num_env_steps_trained": 1868800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1868800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3737600, "timers": {"training_iteration_time_ms": 3723.772, "learn_time_ms": 1145.054, "learn_throughput": 11178.514, "synch_weights_time_ms": 12.116}, "counters": {"num_env_steps_sampled": 1868800, "num_env_steps_trained": 1868800, "num_agent_steps_sampled": 3737600, "num_agent_steps_trained": 3737600}, "done": false, "episodes_total": 4672, "training_iteration": 146, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-27", "timestamp": 1666580967, "time_this_iter_s": 4.079414129257202, "time_total_s": 549.3584508895874, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 549.3584508895874, "timesteps_since_restore": 0, "iterations_since_restore": 146, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.383333333333336, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 185.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 164.51, "shaped_reward_min": 102, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.55, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.25, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.02, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 14.94, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.48, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.02, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 14.94, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.02, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 14.94, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007207918679341674, "policy_loss": -0.0009259622893296182, "vf_loss": 7.4250664710998535, "vf_explained_var": 0.6794564723968506, "kl": 0.0020139352418482304, "entropy": 1.0746699571609497, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1881600, "num_env_steps_trained": 1881600, "num_agent_steps_sampled": 3763200, "num_agent_steps_trained": 3763200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 302.0, "episode_reward_mean": 535.31, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 267.655}, "custom_metrics": {"sparse_reward_mean": 185.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 164.51, "shaped_reward_min": 102, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.55, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.25, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.02, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 14.94, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.09, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.52, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.42, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.58, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.48, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.02, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 14.94, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.02, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 14.94, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684336111759047, "mean_inference_ms": 1.2067049974759831, "mean_action_processing_ms": 0.1324082233532618, "mean_env_wait_ms": 0.8405535827561619, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 302.0, "episode_reward_mean": 535.31, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 267.655}, "hist_stats": {"episode_reward": [570.0, 579.0, 579.0, 516.0, 573.0, 513.0, 513.0, 516.0, 519.0, 519.0, 518.0, 530.0, 522.0, 524.0, 530.0, 573.0, 524.0, 522.0, 522.0, 522.0, 582.0, 510.0, 576.0, 522.0, 576.0, 519.0, 525.0, 573.0, 573.0, 525.0, 525.0, 581.0, 524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 301.0, 291.0, 288.0, 283.0, 296.0, 240.0, 276.0, 277.0, 296.0, 261.0, 252.0, 258.0, 255.0, 251.0, 265.0, 240.0, 279.0, 241.0, 278.0, 258.0, 260.0, 258.0, 272.0, 259.0, 263.0, 249.0, 275.0, 259.0, 271.0, 275.0, 298.0, 251.0, 273.0, 257.0, 265.0, 259.0, 263.0, 266.0, 256.0, 290.0, 292.0, 253.0, 257.0, 306.0, 270.0, 264.0, 258.0, 279.0, 297.0, 251.0, 268.0, 277.0, 248.0, 285.0, 288.0, 288.0, 285.0, 260.0, 265.0, 262.0, 263.0, 292.0, 289.0, 258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.684336111759047, "mean_inference_ms": 1.2067049974759831, "mean_action_processing_ms": 0.1324082233532618, "mean_env_wait_ms": 0.8405535827561619, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3763200, "num_agent_steps_trained": 3763200, "num_env_steps_sampled": 1881600, "num_env_steps_trained": 1881600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1881600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3763200, "timers": {"training_iteration_time_ms": 3726.949, "learn_time_ms": 1146.605, "learn_throughput": 11163.396, "synch_weights_time_ms": 11.601}, "counters": {"num_env_steps_sampled": 1881600, "num_env_steps_trained": 1881600, "num_agent_steps_sampled": 3763200, "num_agent_steps_trained": 3763200}, "done": false, "episodes_total": 4704, "training_iteration": 147, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-31", "timestamp": 1666580971, "time_this_iter_s": 3.6851279735565186, "time_total_s": 553.0435788631439, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 553.0435788631439, "timesteps_since_restore": 0, "iterations_since_restore": 147, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.1, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 184.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 164.0, "shaped_reward_min": 102, "shaped_reward_max": 181, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.2, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 14.95, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 14.72, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.53, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 14.72, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 14.72, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018294923938810825, "policy_loss": -0.0020315488800406456, "vf_loss": 7.403303146362305, "vf_explained_var": 0.683193564414978, "kl": 0.0019366566557437181, "entropy": 1.0765454769134521, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1894400, "num_env_steps_trained": 1894400, "num_agent_steps_sampled": 3788800, "num_agent_steps_trained": 3788800}, "sampler_results": {"episode_reward_max": 581.0, "episode_reward_min": 302.0, "episode_reward_mean": 532.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 266.4}, "custom_metrics": {"sparse_reward_mean": 184.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 164.0, "shaped_reward_min": 102, "shaped_reward_max": 181, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.2, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 14.95, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.2, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 14.72, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.29, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.47, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.53, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.45, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 14.72, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 14.72, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843041815019703, "mean_inference_ms": 1.206781597770555, "mean_action_processing_ms": 0.1324074581959862, "mean_env_wait_ms": 0.8406561526217006, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 581.0, "episode_reward_min": 302.0, "episode_reward_mean": 532.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 151.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 266.4}, "hist_stats": {"episode_reward": [524.0, 519.0, 579.0, 522.0, 570.0, 522.0, 570.0, 525.0, 522.0, 516.0, 567.0, 525.0, 570.0, 579.0, 567.0, 570.0, 573.0, 570.0, 573.0, 525.0, 527.0, 522.0, 522.0, 579.0, 525.0, 527.0, 453.0, 504.0, 573.0, 573.0, 351.0, 527.0, 573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 266.0, 246.0, 273.0, 294.0, 285.0, 245.0, 277.0, 277.0, 293.0, 257.0, 265.0, 286.0, 284.0, 270.0, 255.0, 264.0, 258.0, 250.0, 266.0, 289.0, 278.0, 265.0, 260.0, 305.0, 265.0, 292.0, 287.0, 276.0, 291.0, 280.0, 290.0, 291.0, 282.0, 282.0, 288.0, 288.0, 285.0, 262.0, 263.0, 265.0, 262.0, 257.0, 265.0, 267.0, 255.0, 278.0, 301.0, 267.0, 258.0, 262.0, 265.0, 213.0, 240.0, 258.0, 246.0, 297.0, 276.0, 290.0, 283.0, 173.0, 178.0, 266.0, 261.0, 283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6843041815019703, "mean_inference_ms": 1.206781597770555, "mean_action_processing_ms": 0.1324074581959862, "mean_env_wait_ms": 0.8406561526217006, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3788800, "num_agent_steps_trained": 3788800, "num_env_steps_sampled": 1894400, "num_env_steps_trained": 1894400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1894400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3788800, "timers": {"training_iteration_time_ms": 3713.706, "learn_time_ms": 1146.201, "learn_throughput": 11167.331, "synch_weights_time_ms": 10.546}, "counters": {"num_env_steps_sampled": 1894400, "num_env_steps_trained": 1894400, "num_agent_steps_sampled": 3788800, "num_agent_steps_trained": 3788800}, "done": false, "episodes_total": 4736, "training_iteration": 148, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-35", "timestamp": 1666580975, "time_this_iter_s": 3.6709916591644287, "time_total_s": 556.7145705223083, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 556.7145705223083, "timesteps_since_restore": 0, "iterations_since_restore": 148, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 183.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.28, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.77, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.62, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.45, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.38, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.38, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.45, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.38, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.45, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.38, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003550482913851738, "policy_loss": -0.0037556730676442385, "vf_loss": 7.407064437866211, "vf_explained_var": 0.6945219039916992, "kl": 0.0018070796504616737, "entropy": 1.0710303783416748, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1907200, "num_env_steps_trained": 1907200, "num_agent_steps_sampled": 3814400, "num_agent_steps_trained": 3814400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 530.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 265.24}, "custom_metrics": {"sparse_reward_mean": 183.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.28, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.77, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.82, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.62, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.45, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.38, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.11, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.65, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.15, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.38, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.45, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.38, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.45, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.38, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842792572036572, "mean_inference_ms": 1.2066941044549526, "mean_action_processing_ms": 0.13240474735810767, "mean_env_wait_ms": 0.840493100645701, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 530.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 265.24}, "hist_stats": {"episode_reward": [573.0, 579.0, 525.0, 573.0, 530.0, 516.0, 408.0, 567.0, 522.0, 576.0, 524.0, 513.0, 530.0, 510.0, 576.0, 579.0, 524.0, 513.0, 573.0, 504.0, 576.0, 567.0, 581.0, 527.0, 527.0, 530.0, 522.0, 485.0, 516.0, 525.0, 573.0, 522.0, 302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 290.0, 289.0, 260.0, 265.0, 299.0, 274.0, 257.0, 273.0, 250.0, 266.0, 210.0, 198.0, 274.0, 293.0, 260.0, 262.0, 294.0, 282.0, 262.0, 262.0, 253.0, 260.0, 259.0, 271.0, 255.0, 255.0, 289.0, 287.0, 292.0, 287.0, 266.0, 258.0, 258.0, 255.0, 286.0, 287.0, 242.0, 262.0, 296.0, 280.0, 288.0, 279.0, 293.0, 288.0, 247.0, 280.0, 278.0, 249.0, 276.0, 254.0, 259.0, 263.0, 235.0, 250.0, 264.0, 252.0, 266.0, 259.0, 289.0, 284.0, 252.0, 270.0, 151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842792572036572, "mean_inference_ms": 1.2066941044549526, "mean_action_processing_ms": 0.13240474735810767, "mean_env_wait_ms": 0.840493100645701, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3814400, "num_agent_steps_trained": 3814400, "num_env_steps_sampled": 1907200, "num_env_steps_trained": 1907200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1907200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3814400, "timers": {"training_iteration_time_ms": 3731.597, "learn_time_ms": 1168.313, "learn_throughput": 10955.968, "synch_weights_time_ms": 10.569}, "counters": {"num_env_steps_sampled": 1907200, "num_env_steps_trained": 1907200, "num_agent_steps_sampled": 3814400, "num_agent_steps_trained": 3814400}, "done": false, "episodes_total": 4768, "training_iteration": 149, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-39", "timestamp": 1666580979, "time_this_iter_s": 3.847028970718384, "time_total_s": 560.5615994930267, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 560.5615994930267, "timesteps_since_restore": 0, "iterations_since_restore": 149, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.4, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 184.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.27, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.18, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.92, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.72, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.7, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.47, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.18, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.47, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.47, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.005244456231594086, "policy_loss": -0.005450435448437929, "vf_loss": 7.37734317779541, "vf_explained_var": 0.6951683759689331, "kl": 0.0019620051607489586, "entropy": 1.0635082721710205, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1920000, "num_env_steps_trained": 1920000, "num_agent_steps_sampled": 3840000, "num_agent_steps_trained": 3840000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 532.47, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.235}, "custom_metrics": {"sparse_reward_mean": 184.6, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 163.27, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.18, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 14.92, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.72, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 14.7, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.43, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 14.47, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.14, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.69, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.18, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.36, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 14.47, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 14.47, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842531086706194, "mean_inference_ms": 1.2065739664225887, "mean_action_processing_ms": 0.13239801410378102, "mean_env_wait_ms": 0.8402566274765949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 532.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 266.235}, "hist_stats": {"episode_reward": [302.0, 503.0, 539.0, 579.0, 525.0, 570.0, 567.0, 525.0, 527.0, 522.0, 519.0, 573.0, 493.0, 516.0, 479.0, 525.0, 519.0, 573.0, 513.0, 522.0, 513.0, 507.0, 519.0, 579.0, 573.0, 524.0, 567.0, 479.0, 576.0, 516.0, 573.0, 525.0, 530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [151.0, 151.0, 257.0, 246.0, 268.0, 271.0, 286.0, 293.0, 260.0, 265.0, 283.0, 287.0, 279.0, 288.0, 258.0, 267.0, 265.0, 262.0, 271.0, 251.0, 266.0, 253.0, 292.0, 281.0, 235.0, 258.0, 245.0, 271.0, 232.0, 247.0, 262.0, 263.0, 243.0, 276.0, 287.0, 286.0, 252.0, 261.0, 249.0, 273.0, 242.0, 271.0, 243.0, 264.0, 271.0, 248.0, 285.0, 294.0, 294.0, 279.0, 262.0, 262.0, 278.0, 289.0, 241.0, 238.0, 285.0, 291.0, 256.0, 260.0, 285.0, 288.0, 269.0, 256.0, 276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842531086706194, "mean_inference_ms": 1.2065739664225887, "mean_action_processing_ms": 0.13239801410378102, "mean_env_wait_ms": 0.8402566274765949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3840000, "num_agent_steps_trained": 3840000, "num_env_steps_sampled": 1920000, "num_env_steps_trained": 1920000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1920000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3840000, "timers": {"training_iteration_time_ms": 3727.268, "learn_time_ms": 1171.553, "learn_throughput": 10925.667, "synch_weights_time_ms": 11.124}, "counters": {"num_env_steps_sampled": 1920000, "num_env_steps_trained": 1920000, "num_agent_steps_sampled": 3840000, "num_agent_steps_trained": 3840000}, "done": false, "episodes_total": 4800, "training_iteration": 150, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-43", "timestamp": 1666580983, "time_this_iter_s": 3.611518144607544, "time_total_s": 564.1731176376343, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 564.1731176376343, "timesteps_since_restore": 0, "iterations_since_restore": 150, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.58333333333333, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 188.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 165.17, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.3, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.08, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.62, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.43, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.62, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.62, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00285069621168077, "policy_loss": -0.0030640815384685993, "vf_loss": 7.426201820373535, "vf_explained_var": 0.6838560104370117, "kl": 0.002210653852671385, "entropy": 1.0584666728973389, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1932800, "num_env_steps_trained": 1932800, "num_agent_steps_sampled": 3865600, "num_agent_steps_trained": 3865600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 541.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 270.785}, "custom_metrics": {"sparse_reward_mean": 188.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 165.17, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.3, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.08, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.62, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 4.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.67, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.43, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.62, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.62, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842379170569299, "mean_inference_ms": 1.2064662067783543, "mean_action_processing_ms": 0.13238804610453048, "mean_env_wait_ms": 0.8400068049251856, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 541.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 270.785}, "hist_stats": {"episode_reward": [530.0, 522.0, 522.0, 527.0, 570.0, 525.0, 522.0, 564.0, 519.0, 519.0, 525.0, 579.0, 567.0, 570.0, 570.0, 498.0, 525.0, 573.0, 504.0, 237.0, 527.0, 530.0, 525.0, 522.0, 576.0, 525.0, 519.0, 441.0, 527.0, 579.0, 527.0, 582.0, 525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 254.0, 255.0, 267.0, 258.0, 264.0, 256.0, 271.0, 284.0, 286.0, 256.0, 269.0, 267.0, 255.0, 270.0, 294.0, 255.0, 264.0, 258.0, 261.0, 265.0, 260.0, 291.0, 288.0, 277.0, 290.0, 294.0, 276.0, 279.0, 291.0, 244.0, 254.0, 269.0, 256.0, 283.0, 290.0, 257.0, 247.0, 123.0, 114.0, 272.0, 255.0, 261.0, 269.0, 270.0, 255.0, 248.0, 274.0, 280.0, 296.0, 268.0, 257.0, 256.0, 263.0, 222.0, 219.0, 270.0, 257.0, 292.0, 287.0, 262.0, 265.0, 276.0, 306.0, 259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6842379170569299, "mean_inference_ms": 1.2064662067783543, "mean_action_processing_ms": 0.13238804610453048, "mean_env_wait_ms": 0.8400068049251856, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3865600, "num_agent_steps_trained": 3865600, "num_env_steps_sampled": 1932800, "num_env_steps_trained": 1932800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1932800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3865600, "timers": {"training_iteration_time_ms": 3704.776, "learn_time_ms": 1153.444, "learn_throughput": 11097.201, "synch_weights_time_ms": 11.24}, "counters": {"num_env_steps_sampled": 1932800, "num_env_steps_trained": 1932800, "num_agent_steps_sampled": 3865600, "num_agent_steps_trained": 3865600}, "done": false, "episodes_total": 4832, "training_iteration": 151, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-46", "timestamp": 1666580986, "time_this_iter_s": 3.5203089714050293, "time_total_s": 567.6934266090393, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 567.6934266090393, "timesteps_since_restore": 0, "iterations_since_restore": 151, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 167.1, "shaped_reward_min": 93, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.52, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.25, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.33, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.25, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.33, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.25, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.33, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008631563978269696, "policy_loss": 0.0006459215073846281, "vf_loss": 7.484288692474365, "vf_explained_var": 0.6832795143127441, "kl": 0.0022935159504413605, "entropy": 1.0623841285705566, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1945600, "num_env_steps_trained": 1945600, "num_agent_steps_sampled": 3891200, "num_agent_steps_trained": 3891200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 333.0, "episode_reward_mean": 546.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 164.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 273.35}, "custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 167.1, "shaped_reward_min": 93, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.52, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.47, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.32, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.25, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.33, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.6, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.61, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.16, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.78, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.25, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.33, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.25, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.33, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6841903962359359, "mean_inference_ms": 1.2063546072455014, "mean_action_processing_ms": 0.13237385340172605, "mean_env_wait_ms": 0.8397353790639168, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 333.0, "episode_reward_mean": 546.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 164.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 273.35}, "hist_stats": {"episode_reward": [525.0, 524.0, 576.0, 567.0, 582.0, 570.0, 573.0, 525.0, 522.0, 504.0, 570.0, 519.0, 576.0, 522.0, 567.0, 533.0, 333.0, 573.0, 522.0, 570.0, 570.0, 525.0, 513.0, 573.0, 522.0, 522.0, 576.0, 576.0, 570.0, 573.0, 573.0, 567.0, 573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 266.0, 265.0, 259.0, 279.0, 297.0, 287.0, 280.0, 299.0, 283.0, 296.0, 274.0, 287.0, 286.0, 253.0, 272.0, 256.0, 266.0, 248.0, 256.0, 284.0, 286.0, 261.0, 258.0, 283.0, 293.0, 258.0, 264.0, 286.0, 281.0, 265.0, 268.0, 169.0, 164.0, 286.0, 287.0, 259.0, 263.0, 282.0, 288.0, 274.0, 296.0, 263.0, 262.0, 272.0, 241.0, 277.0, 296.0, 263.0, 259.0, 271.0, 251.0, 299.0, 277.0, 296.0, 280.0, 293.0, 277.0, 283.0, 290.0, 283.0, 290.0, 272.0, 295.0, 287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6841903962359359, "mean_inference_ms": 1.2063546072455014, "mean_action_processing_ms": 0.13237385340172605, "mean_env_wait_ms": 0.8397353790639168, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3891200, "num_agent_steps_trained": 3891200, "num_env_steps_sampled": 1945600, "num_env_steps_trained": 1945600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1945600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3891200, "timers": {"training_iteration_time_ms": 3703.336, "learn_time_ms": 1148.195, "learn_throughput": 11147.932, "synch_weights_time_ms": 10.616}, "counters": {"num_env_steps_sampled": 1945600, "num_env_steps_trained": 1945600, "num_agent_steps_sampled": 3891200, "num_agent_steps_trained": 3891200}, "done": false, "episodes_total": 4864, "training_iteration": 152, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-50", "timestamp": 1666580990, "time_this_iter_s": 3.713094472885132, "time_total_s": 571.4065210819244, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 571.4065210819244, "timesteps_since_restore": 0, "iterations_since_restore": 152, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.69, "shaped_reward_min": 136, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.02, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.76, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002908505266532302, "policy_loss": -0.003109340090304613, "vf_loss": 7.339377403259277, "vf_explained_var": 0.6975228190422058, "kl": 0.0020682807080447674, "entropy": 1.0662044286727905, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1958400, "num_env_steps_trained": 1958400, "num_agent_steps_sampled": 3916800, "num_agent_steps_trained": 3916800}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 456.0, "episode_reward_mean": 546.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.445}, "custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.69, "shaped_reward_min": 136, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.02, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.26, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.76, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.52, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6841687379475, "mean_inference_ms": 1.2062809665348004, "mean_action_processing_ms": 0.13236151110511024, "mean_env_wait_ms": 0.8394849551462821, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 456.0, "episode_reward_mean": 546.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.445}, "hist_stats": {"episode_reward": [573.0, 525.0, 521.0, 525.0, 576.0, 570.0, 573.0, 519.0, 573.0, 570.0, 549.0, 516.0, 522.0, 522.0, 573.0, 573.0, 576.0, 519.0, 530.0, 573.0, 516.0, 576.0, 570.0, 579.0, 576.0, 530.0, 579.0, 579.0, 570.0, 573.0, 456.0, 579.0, 510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 260.0, 265.0, 259.0, 262.0, 251.0, 274.0, 293.0, 283.0, 284.0, 286.0, 297.0, 276.0, 252.0, 267.0, 274.0, 299.0, 282.0, 288.0, 274.0, 275.0, 263.0, 253.0, 254.0, 268.0, 263.0, 259.0, 288.0, 285.0, 282.0, 291.0, 280.0, 296.0, 262.0, 257.0, 260.0, 270.0, 272.0, 301.0, 262.0, 254.0, 292.0, 284.0, 281.0, 289.0, 291.0, 288.0, 288.0, 288.0, 269.0, 261.0, 308.0, 271.0, 282.0, 297.0, 281.0, 289.0, 282.0, 291.0, 223.0, 233.0, 293.0, 286.0, 261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6841687379475, "mean_inference_ms": 1.2062809665348004, "mean_action_processing_ms": 0.13236151110511024, "mean_env_wait_ms": 0.8394849551462821, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3916800, "num_agent_steps_trained": 3916800, "num_env_steps_sampled": 1958400, "num_env_steps_trained": 1958400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1958400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3916800, "timers": {"training_iteration_time_ms": 3698.273, "learn_time_ms": 1140.347, "learn_throughput": 11224.649, "synch_weights_time_ms": 11.356}, "counters": {"num_env_steps_sampled": 1958400, "num_env_steps_trained": 1958400, "num_agent_steps_sampled": 3916800, "num_agent_steps_trained": 3916800}, "done": false, "episodes_total": 4896, "training_iteration": 153, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-54", "timestamp": 1666580994, "time_this_iter_s": 3.7132623195648193, "time_total_s": 575.1197834014893, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 575.1197834014893, "timesteps_since_restore": 0, "iterations_since_restore": 153, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.21, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.06, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.71, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.84, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.8, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.71, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.84, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.71, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.84, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0023376313038170338, "policy_loss": -0.0025501232594251633, "vf_loss": 7.455283164978027, "vf_explained_var": 0.6846986413002014, "kl": 0.0021816184744238853, "entropy": 1.0660704374313354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1971200, "num_env_steps_trained": 1971200, "num_agent_steps_sampled": 3942400, "num_agent_steps_trained": 3942400}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 462.0, "episode_reward_mean": 546.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.005}, "custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.21, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.06, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.19, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.71, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.84, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 14, "useful_dish_pickup_agent_0_mean": 4.8, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.32, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.48, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.45, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 7, "useful_dish_drop_agent_0_mean": 0.16, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.71, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.84, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.71, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.84, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6840882825634185, "mean_inference_ms": 1.2061931707471094, "mean_action_processing_ms": 0.13235037392991747, "mean_env_wait_ms": 0.839236522341682, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 462.0, "episode_reward_mean": 546.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.005}, "hist_stats": {"episode_reward": [510.0, 576.0, 576.0, 573.0, 522.0, 527.0, 525.0, 573.0, 561.0, 530.0, 525.0, 573.0, 516.0, 530.0, 573.0, 576.0, 579.0, 579.0, 573.0, 501.0, 527.0, 533.0, 579.0, 530.0, 579.0, 519.0, 522.0, 570.0, 525.0, 510.0, 573.0, 582.0, 530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 249.0, 288.0, 288.0, 292.0, 284.0, 278.0, 295.0, 255.0, 267.0, 274.0, 253.0, 268.0, 257.0, 268.0, 305.0, 301.0, 260.0, 273.0, 257.0, 258.0, 267.0, 295.0, 278.0, 245.0, 271.0, 272.0, 258.0, 292.0, 281.0, 290.0, 286.0, 290.0, 289.0, 290.0, 289.0, 281.0, 292.0, 253.0, 248.0, 261.0, 266.0, 265.0, 268.0, 281.0, 298.0, 270.0, 260.0, 295.0, 284.0, 270.0, 249.0, 255.0, 267.0, 295.0, 275.0, 265.0, 260.0, 255.0, 255.0, 267.0, 306.0, 302.0, 280.0, 251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6840882825634185, "mean_inference_ms": 1.2061931707471094, "mean_action_processing_ms": 0.13235037392991747, "mean_env_wait_ms": 0.839236522341682, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3942400, "num_agent_steps_trained": 3942400, "num_env_steps_sampled": 1971200, "num_env_steps_trained": 1971200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1971200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3942400, "timers": {"training_iteration_time_ms": 3693.082, "learn_time_ms": 1137.156, "learn_throughput": 11256.149, "synch_weights_time_ms": 11.882}, "counters": {"num_env_steps_sampled": 1971200, "num_env_steps_trained": 1971200, "num_agent_steps_sampled": 3942400, "num_agent_steps_trained": 3942400}, "done": false, "episodes_total": 4928, "training_iteration": 154, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-09-58", "timestamp": 1666580998, "time_this_iter_s": 3.6708226203918457, "time_total_s": 578.7906060218811, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 578.7906060218811, "timesteps_since_restore": 0, "iterations_since_restore": 154, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.583333333333332, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.31, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.85, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.85, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.85, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011531509226188064, "policy_loss": -0.0013669736217707396, "vf_loss": 7.4545488357543945, "vf_explained_var": 0.6869913339614868, "kl": 0.0018564509227871895, "entropy": 1.063261866569519, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1984000, "num_env_steps_trained": 1984000, "num_agent_steps_sampled": 3968000, "num_agent_steps_trained": 3968000}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 462.0, "episode_reward_mean": 546.51, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.255}, "custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.31, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.85, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.45, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.85, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.85, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68402604365027, "mean_inference_ms": 1.2061263336972392, "mean_action_processing_ms": 0.1323433901173326, "mean_env_wait_ms": 0.839023040007508, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 462.0, "episode_reward_mean": 546.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 273.255}, "hist_stats": {"episode_reward": [530.0, 576.0, 473.0, 470.0, 582.0, 525.0, 570.0, 579.0, 519.0, 507.0, 462.0, 587.0, 519.0, 558.0, 570.0, 576.0, 525.0, 510.0, 573.0, 525.0, 525.0, 519.0, 525.0, 573.0, 519.0, 530.0, 525.0, 522.0, 573.0, 576.0, 582.0, 527.0, 573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 279.0, 293.0, 283.0, 243.0, 230.0, 239.0, 231.0, 286.0, 296.0, 267.0, 258.0, 280.0, 290.0, 293.0, 286.0, 256.0, 263.0, 250.0, 257.0, 239.0, 223.0, 278.0, 309.0, 261.0, 258.0, 284.0, 274.0, 287.0, 283.0, 279.0, 297.0, 264.0, 261.0, 249.0, 261.0, 287.0, 286.0, 269.0, 256.0, 257.0, 268.0, 248.0, 271.0, 267.0, 258.0, 285.0, 288.0, 272.0, 247.0, 269.0, 261.0, 271.0, 254.0, 254.0, 268.0, 281.0, 292.0, 287.0, 289.0, 281.0, 301.0, 269.0, 258.0, 281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68402604365027, "mean_inference_ms": 1.2061263336972392, "mean_action_processing_ms": 0.1323433901173326, "mean_env_wait_ms": 0.839023040007508, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3968000, "num_agent_steps_trained": 3968000, "num_env_steps_sampled": 1984000, "num_env_steps_trained": 1984000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1984000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3968000, "timers": {"training_iteration_time_ms": 3661.855, "learn_time_ms": 1121.113, "learn_throughput": 11417.226, "synch_weights_time_ms": 12.244}, "counters": {"num_env_steps_sampled": 1984000, "num_env_steps_trained": 1984000, "num_agent_steps_sampled": 3968000, "num_agent_steps_trained": 3968000}, "done": false, "episodes_total": 4960, "training_iteration": 155, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-02", "timestamp": 1666581002, "time_this_iter_s": 3.6832804679870605, "time_total_s": 582.4738864898682, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 582.4738864898682, "timesteps_since_restore": 0, "iterations_since_restore": 155, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.14, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 168.36, "shaped_reward_min": 138, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.08, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.08, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.08, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00024176511215046048, "policy_loss": 3.316625952720642e-05, "vf_loss": 7.437552452087402, "vf_explained_var": 0.6896594166755676, "kl": 0.002203156938776374, "entropy": 1.0703125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 1996800, "num_env_steps_trained": 1996800, "num_agent_steps_sampled": 3993600, "num_agent_steps_trained": 3993600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 436.0, "episode_reward_mean": 548.36, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 274.18}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 168.36, "shaped_reward_min": 138, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.08, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.46, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.43, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.14, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.08, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.08, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6839270699036237, "mean_inference_ms": 1.206053543227454, "mean_action_processing_ms": 0.13233675331157269, "mean_env_wait_ms": 0.8388131632821678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 436.0, "episode_reward_mean": 548.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 215.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 274.18}, "hist_stats": {"episode_reward": [573.0, 576.0, 530.0, 570.0, 530.0, 555.0, 558.0, 498.0, 573.0, 567.0, 530.0, 507.0, 570.0, 519.0, 573.0, 570.0, 516.0, 570.0, 525.0, 579.0, 576.0, 533.0, 522.0, 519.0, 576.0, 519.0, 573.0, 513.0, 579.0, 570.0, 533.0, 576.0, 579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 292.0, 298.0, 278.0, 263.0, 267.0, 280.0, 290.0, 256.0, 274.0, 277.0, 278.0, 286.0, 272.0, 264.0, 234.0, 299.0, 274.0, 280.0, 287.0, 263.0, 267.0, 239.0, 268.0, 287.0, 283.0, 258.0, 261.0, 289.0, 284.0, 273.0, 297.0, 255.0, 261.0, 285.0, 285.0, 262.0, 263.0, 292.0, 287.0, 286.0, 290.0, 269.0, 264.0, 259.0, 263.0, 272.0, 247.0, 276.0, 300.0, 268.0, 251.0, 277.0, 296.0, 251.0, 262.0, 294.0, 285.0, 286.0, 284.0, 267.0, 266.0, 284.0, 292.0, 297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6839270699036237, "mean_inference_ms": 1.206053543227454, "mean_action_processing_ms": 0.13233675331157269, "mean_env_wait_ms": 0.8388131632821678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 3993600, "num_agent_steps_trained": 3993600, "num_env_steps_sampled": 1996800, "num_env_steps_trained": 1996800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 1996800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 3993600, "timers": {"training_iteration_time_ms": 3616.468, "learn_time_ms": 1118.033, "learn_throughput": 11448.682, "synch_weights_time_ms": 11.585}, "counters": {"num_env_steps_sampled": 1996800, "num_env_steps_trained": 1996800, "num_agent_steps_sampled": 3993600, "num_agent_steps_trained": 3993600}, "done": false, "episodes_total": 4992, "training_iteration": 156, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-06", "timestamp": 1666581006, "time_this_iter_s": 3.615835189819336, "time_total_s": 586.0897216796875, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 586.0897216796875, "timesteps_since_restore": 0, "iterations_since_restore": 156, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666668, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.32, "shaped_reward_min": 137, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0025629051961004734, "policy_loss": -0.0027682920917868614, "vf_loss": 7.394125938415527, "vf_explained_var": 0.6789853572845459, "kl": 0.0019060579361394048, "entropy": 1.0680499076843262, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2009600, "num_env_steps_trained": 2009600, "num_agent_steps_sampled": 4019200, "num_agent_steps_trained": 4019200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 549.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 274.66}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.32, "shaped_reward_min": 137, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.97, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.41, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.97, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.97, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6838652052858929, "mean_inference_ms": 1.205993726372932, "mean_action_processing_ms": 0.13233281493670865, "mean_env_wait_ms": 0.8386165066666643, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 549.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 274.66}, "hist_stats": {"episode_reward": [579.0, 525.0, 573.0, 567.0, 573.0, 519.0, 519.0, 576.0, 552.0, 576.0, 576.0, 519.0, 464.0, 530.0, 570.0, 576.0, 576.0, 519.0, 570.0, 582.0, 522.0, 579.0, 525.0, 573.0, 576.0, 573.0, 525.0, 576.0, 576.0, 530.0, 525.0, 579.0, 570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 260.0, 265.0, 288.0, 285.0, 266.0, 301.0, 289.0, 284.0, 265.0, 254.0, 256.0, 263.0, 275.0, 301.0, 272.0, 280.0, 292.0, 284.0, 293.0, 283.0, 252.0, 267.0, 231.0, 233.0, 262.0, 268.0, 288.0, 282.0, 286.0, 290.0, 274.0, 302.0, 263.0, 256.0, 280.0, 290.0, 290.0, 292.0, 263.0, 259.0, 289.0, 290.0, 260.0, 265.0, 289.0, 284.0, 302.0, 274.0, 277.0, 296.0, 254.0, 271.0, 289.0, 287.0, 281.0, 295.0, 274.0, 256.0, 268.0, 257.0, 285.0, 294.0, 283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6838652052858929, "mean_inference_ms": 1.205993726372932, "mean_action_processing_ms": 0.13233281493670865, "mean_env_wait_ms": 0.8386165066666643, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4019200, "num_agent_steps_trained": 4019200, "num_env_steps_sampled": 2009600, "num_env_steps_trained": 2009600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2009600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4019200, "timers": {"training_iteration_time_ms": 3617.124, "learn_time_ms": 1120.099, "learn_throughput": 11427.556, "synch_weights_time_ms": 11.442}, "counters": {"num_env_steps_sampled": 2009600, "num_env_steps_trained": 2009600, "num_agent_steps_sampled": 4019200, "num_agent_steps_trained": 4019200}, "done": false, "episodes_total": 5024, "training_iteration": 157, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-09", "timestamp": 1666581009, "time_this_iter_s": 3.6556949615478516, "time_total_s": 589.7454166412354, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 589.7454166412354, "timesteps_since_restore": 0, "iterations_since_restore": 157, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.160000000000004, "ram_util_percent": 10.620000000000001}} +{"custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.76, "shaped_reward_min": 137, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.69, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.8, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.8, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.8, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012510658707469702, "policy_loss": 0.001050335355103016, "vf_loss": 7.332821846008301, "vf_explained_var": 0.6917320489883423, "kl": 0.0018009908962994814, "entropy": 1.0651018619537354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2022400, "num_env_steps_trained": 2022400, "num_agent_steps_sampled": 4044800, "num_agent_steps_trained": 4044800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 550.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.28}, "custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.76, "shaped_reward_min": 137, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.69, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.8, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.43, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.8, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.8, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6837868313458785, "mean_inference_ms": 1.2059317974395252, "mean_action_processing_ms": 0.1323313020756567, "mean_env_wait_ms": 0.838427317675574, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 550.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.28}, "hist_stats": {"episode_reward": [570.0, 530.0, 525.0, 516.0, 582.0, 525.0, 579.0, 576.0, 573.0, 530.0, 579.0, 513.0, 533.0, 525.0, 576.0, 533.0, 576.0, 576.0, 579.0, 484.0, 516.0, 573.0, 461.0, 576.0, 476.0, 579.0, 530.0, 518.0, 518.0, 570.0, 576.0, 576.0, 436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 264.0, 266.0, 266.0, 259.0, 267.0, 249.0, 301.0, 281.0, 265.0, 260.0, 282.0, 297.0, 294.0, 282.0, 286.0, 287.0, 262.0, 268.0, 290.0, 289.0, 254.0, 259.0, 271.0, 262.0, 251.0, 274.0, 287.0, 289.0, 267.0, 266.0, 295.0, 281.0, 298.0, 278.0, 277.0, 302.0, 241.0, 243.0, 246.0, 270.0, 277.0, 296.0, 234.0, 227.0, 292.0, 284.0, 237.0, 239.0, 287.0, 292.0, 263.0, 267.0, 271.0, 247.0, 267.0, 251.0, 287.0, 283.0, 293.0, 283.0, 275.0, 301.0, 221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6837868313458785, "mean_inference_ms": 1.2059317974395252, "mean_action_processing_ms": 0.1323313020756567, "mean_env_wait_ms": 0.838427317675574, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4044800, "num_agent_steps_trained": 4044800, "num_env_steps_sampled": 2022400, "num_env_steps_trained": 2022400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2022400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4044800, "timers": {"training_iteration_time_ms": 3613.095, "learn_time_ms": 1111.871, "learn_throughput": 11512.124, "synch_weights_time_ms": 12.441}, "counters": {"num_env_steps_sampled": 2022400, "num_env_steps_trained": 2022400, "num_agent_steps_sampled": 4044800, "num_agent_steps_trained": 4044800}, "done": false, "episodes_total": 5056, "training_iteration": 158, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-13", "timestamp": 1666581013, "time_this_iter_s": 3.6319639682769775, "time_total_s": 593.3773806095123, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 593.3773806095123, "timesteps_since_restore": 0, "iterations_since_restore": 158, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.720000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.91, "shaped_reward_min": 133, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.78, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.94, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.88, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.88, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.88, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0031918580643832684, "policy_loss": -0.0034064347855746746, "vf_loss": 7.42955207824707, "vf_explained_var": 0.682861864566803, "kl": 0.0020245909690856934, "entropy": 1.0567570924758911, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2035200, "num_env_steps_trained": 2035200, "num_agent_steps_sampled": 4070400, "num_agent_steps_trained": 4070400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 552.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 276.355}, "custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.91, "shaped_reward_min": 133, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.78, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.94, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.88, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.17, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.97, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.88, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.17, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.88, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.17, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6837424136699048, "mean_inference_ms": 1.205881311016033, "mean_action_processing_ms": 0.13233429471967506, "mean_env_wait_ms": 0.8382726198110214, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 417.0, "episode_reward_mean": 552.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 276.355}, "hist_stats": {"episode_reward": [436.0, 576.0, 527.0, 570.0, 573.0, 573.0, 530.0, 582.0, 525.0, 573.0, 570.0, 516.0, 576.0, 570.0, 579.0, 579.0, 417.0, 576.0, 579.0, 525.0, 482.0, 522.0, 579.0, 573.0, 579.0, 576.0, 576.0, 567.0, 570.0, 519.0, 522.0, 519.0, 576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [221.0, 215.0, 292.0, 284.0, 261.0, 266.0, 302.0, 268.0, 285.0, 288.0, 287.0, 286.0, 261.0, 269.0, 289.0, 293.0, 275.0, 250.0, 288.0, 285.0, 302.0, 268.0, 256.0, 260.0, 277.0, 299.0, 286.0, 284.0, 291.0, 288.0, 294.0, 285.0, 224.0, 193.0, 286.0, 290.0, 283.0, 296.0, 246.0, 279.0, 236.0, 246.0, 267.0, 255.0, 285.0, 294.0, 290.0, 283.0, 303.0, 276.0, 283.0, 293.0, 283.0, 293.0, 290.0, 277.0, 275.0, 295.0, 259.0, 260.0, 258.0, 264.0, 257.0, 262.0, 296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6837424136699048, "mean_inference_ms": 1.205881311016033, "mean_action_processing_ms": 0.13233429471967506, "mean_env_wait_ms": 0.8382726198110214, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4070400, "num_agent_steps_trained": 4070400, "num_env_steps_sampled": 2035200, "num_env_steps_trained": 2035200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2035200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4070400, "timers": {"training_iteration_time_ms": 3599.198, "learn_time_ms": 1093.268, "learn_throughput": 11708.018, "synch_weights_time_ms": 12.338}, "counters": {"num_env_steps_sampled": 2035200, "num_env_steps_trained": 2035200, "num_agent_steps_sampled": 4070400, "num_agent_steps_trained": 4070400}, "done": false, "episodes_total": 5088, "training_iteration": 159, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-17", "timestamp": 1666581017, "time_this_iter_s": 3.688915967941284, "time_total_s": 597.0662965774536, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 597.0662965774536, "timesteps_since_restore": 0, "iterations_since_restore": 159, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.349999999999998, "ram_util_percent": 10.633333333333333}} +{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 169.5, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.83, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009515164420008659, "policy_loss": -0.0011683932971209288, "vf_loss": 7.463791847229004, "vf_explained_var": 0.6746877431869507, "kl": 0.0020292492117732763, "entropy": 1.0590016841888428, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2048000, "num_env_steps_trained": 2048000, "num_agent_steps_sampled": 4096000, "num_agent_steps_trained": 4096000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 551.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 275.95}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 169.5, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.83, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836964337068232, "mean_inference_ms": 1.2058291748355823, "mean_action_processing_ms": 0.13234047861272333, "mean_env_wait_ms": 0.8381292335140906, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 551.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 275.95}, "hist_stats": {"episode_reward": [576.0, 522.0, 576.0, 573.0, 519.0, 579.0, 576.0, 525.0, 576.0, 530.0, 570.0, 573.0, 530.0, 579.0, 573.0, 573.0, 530.0, 522.0, 570.0, 519.0, 522.0, 570.0, 570.0, 579.0, 573.0, 576.0, 512.0, 582.0, 516.0, 570.0, 530.0, 579.0, 573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 280.0, 275.0, 247.0, 278.0, 298.0, 295.0, 278.0, 266.0, 253.0, 292.0, 287.0, 299.0, 277.0, 272.0, 253.0, 287.0, 289.0, 259.0, 271.0, 287.0, 283.0, 281.0, 292.0, 268.0, 262.0, 296.0, 283.0, 293.0, 280.0, 282.0, 291.0, 253.0, 277.0, 253.0, 269.0, 280.0, 290.0, 245.0, 274.0, 264.0, 258.0, 282.0, 288.0, 285.0, 285.0, 279.0, 300.0, 282.0, 291.0, 286.0, 290.0, 257.0, 255.0, 301.0, 281.0, 262.0, 254.0, 279.0, 291.0, 257.0, 273.0, 278.0, 301.0, 295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836964337068232, "mean_inference_ms": 1.2058291748355823, "mean_action_processing_ms": 0.13234047861272333, "mean_env_wait_ms": 0.8381292335140906, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4096000, "num_agent_steps_trained": 4096000, "num_env_steps_sampled": 2048000, "num_env_steps_trained": 2048000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2048000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4096000, "timers": {"training_iteration_time_ms": 3595.401, "learn_time_ms": 1086.959, "learn_throughput": 11775.969, "synch_weights_time_ms": 11.722}, "counters": {"num_env_steps_sampled": 2048000, "num_env_steps_trained": 2048000, "num_agent_steps_sampled": 4096000, "num_agent_steps_trained": 4096000}, "done": false, "episodes_total": 5120, "training_iteration": 160, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-21", "timestamp": 1666581021, "time_this_iter_s": 3.5925498008728027, "time_total_s": 600.6588463783264, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 600.6588463783264, "timesteps_since_restore": 0, "iterations_since_restore": 160, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.16, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 169.95, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.8, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.81, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019562705419957638, "policy_loss": 0.0017559619154781103, "vf_loss": 7.291747093200684, "vf_explained_var": 0.6828951835632324, "kl": 0.0024195481091737747, "entropy": 1.0577306747436523, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2060800, "num_env_steps_trained": 2060800, "num_agent_steps_sampled": 4121600, "num_agent_steps_trained": 4121600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 552.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 276.175}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 169.95, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.68, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.8, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.81, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836692176439121, "mean_inference_ms": 1.2057925696042, "mean_action_processing_ms": 0.13234922853721529, "mean_env_wait_ms": 0.8380159133935706, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 552.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 276.175}, "hist_stats": {"episode_reward": [573.0, 582.0, 576.0, 570.0, 522.0, 519.0, 579.0, 522.0, 525.0, 573.0, 579.0, 573.0, 573.0, 579.0, 576.0, 573.0, 573.0, 501.0, 561.0, 530.0, 522.0, 579.0, 522.0, 530.0, 573.0, 579.0, 582.0, 522.0, 530.0, 582.0, 453.0, 573.0, 579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 278.0, 286.0, 296.0, 290.0, 286.0, 282.0, 288.0, 249.0, 273.0, 263.0, 256.0, 295.0, 284.0, 264.0, 258.0, 263.0, 262.0, 292.0, 281.0, 294.0, 285.0, 283.0, 290.0, 286.0, 287.0, 286.0, 293.0, 285.0, 291.0, 287.0, 286.0, 266.0, 307.0, 246.0, 255.0, 280.0, 281.0, 259.0, 271.0, 268.0, 254.0, 288.0, 291.0, 261.0, 261.0, 273.0, 257.0, 293.0, 280.0, 290.0, 289.0, 284.0, 298.0, 260.0, 262.0, 258.0, 272.0, 294.0, 288.0, 204.0, 249.0, 290.0, 283.0, 297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836692176439121, "mean_inference_ms": 1.2057925696042, "mean_action_processing_ms": 0.13234922853721529, "mean_env_wait_ms": 0.8380159133935706, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4121600, "num_agent_steps_trained": 4121600, "num_env_steps_sampled": 2060800, "num_env_steps_trained": 2060800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2060800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4121600, "timers": {"training_iteration_time_ms": 3609.109, "learn_time_ms": 1096.295, "learn_throughput": 11675.688, "synch_weights_time_ms": 12.439}, "counters": {"num_env_steps_sampled": 2060800, "num_env_steps_trained": 2060800, "num_agent_steps_sampled": 4121600, "num_agent_steps_trained": 4121600}, "done": false, "episodes_total": 5152, "training_iteration": 161, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-25", "timestamp": 1666581025, "time_this_iter_s": 3.668421745300293, "time_total_s": 604.3272681236267, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 604.3272681236267, "timesteps_since_restore": 0, "iterations_since_restore": 161, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.283333333333335, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 170.53, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.86, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.32, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.8, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.32, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.32, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015147824306041002, "policy_loss": -0.001708789961412549, "vf_loss": 7.2465362548828125, "vf_explained_var": 0.684109091758728, "kl": 0.002135517541319132, "entropy": 1.0612905025482178, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2073600, "num_env_steps_trained": 2073600, "num_agent_steps_sampled": 4147200, "num_agent_steps_trained": 4147200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 554.13, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 277.065}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 170.53, "shaped_reward_min": 82, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.86, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.97, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.32, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.8, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.32, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.32, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836431633165089, "mean_inference_ms": 1.2057460710873107, "mean_action_processing_ms": 0.13235222793419196, "mean_env_wait_ms": 0.8378768884664711, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 282.0, "episode_reward_mean": 554.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 137.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 277.065}, "hist_stats": {"episode_reward": [579.0, 522.0, 579.0, 579.0, 579.0, 522.0, 579.0, 521.0, 573.0, 579.0, 579.0, 573.0, 573.0, 579.0, 533.0, 519.0, 525.0, 522.0, 573.0, 530.0, 476.0, 582.0, 576.0, 282.0, 579.0, 573.0, 573.0, 555.0, 530.0, 579.0, 576.0, 522.0, 579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 264.0, 258.0, 283.0, 296.0, 306.0, 273.0, 287.0, 292.0, 261.0, 261.0, 293.0, 286.0, 271.0, 250.0, 283.0, 290.0, 283.0, 296.0, 290.0, 289.0, 282.0, 291.0, 293.0, 280.0, 290.0, 289.0, 265.0, 268.0, 250.0, 269.0, 252.0, 273.0, 248.0, 274.0, 278.0, 295.0, 264.0, 266.0, 240.0, 236.0, 303.0, 279.0, 284.0, 292.0, 145.0, 137.0, 285.0, 294.0, 293.0, 280.0, 284.0, 289.0, 264.0, 291.0, 259.0, 271.0, 289.0, 290.0, 300.0, 276.0, 250.0, 272.0, 285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836431633165089, "mean_inference_ms": 1.2057460710873107, "mean_action_processing_ms": 0.13235222793419196, "mean_env_wait_ms": 0.8378768884664711, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4147200, "num_agent_steps_trained": 4147200, "num_env_steps_sampled": 2073600, "num_env_steps_trained": 2073600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2073600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4147200, "timers": {"training_iteration_time_ms": 3604.206, "learn_time_ms": 1093.522, "learn_throughput": 11705.3, "synch_weights_time_ms": 12.527}, "counters": {"num_env_steps_sampled": 2073600, "num_env_steps_trained": 2073600, "num_agent_steps_sampled": 4147200, "num_agent_steps_trained": 4147200}, "done": false, "episodes_total": 5184, "training_iteration": 162, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-28", "timestamp": 1666581028, "time_this_iter_s": 3.661637544631958, "time_total_s": 607.9889056682587, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 607.9889056682587, "timesteps_since_restore": 0, "iterations_since_restore": 162, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.88, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 168.45, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.64, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.57, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.57, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.57, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005986830219626427, "policy_loss": 0.00038289371877908707, "vf_loss": 7.4827728271484375, "vf_explained_var": 0.6809073090553284, "kl": 0.001983621623367071, "entropy": 1.0649769306182861, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2086400, "num_env_steps_trained": 2086400, "num_agent_steps_sampled": 4172800, "num_agent_steps_trained": 4172800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 546.85, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 273.425}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 168.45, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.64, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.57, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.57, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.57, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836350018322129, "mean_inference_ms": 1.2057417308134253, "mean_action_processing_ms": 0.13235457058603195, "mean_env_wait_ms": 0.837765417455068, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 546.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 273.425}, "hist_stats": {"episode_reward": [579.0, 567.0, 525.0, 522.0, 570.0, 525.0, 522.0, 570.0, 579.0, 536.0, 576.0, 522.0, 576.0, 519.0, 533.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 573.0, 519.0, 533.0, 579.0, 530.0, 579.0, 579.0, 579.0, 576.0, 573.0, 525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 281.0, 286.0, 273.0, 252.0, 254.0, 268.0, 276.0, 294.0, 267.0, 258.0, 268.0, 254.0, 289.0, 281.0, 296.0, 283.0, 270.0, 266.0, 288.0, 288.0, 258.0, 264.0, 289.0, 287.0, 249.0, 270.0, 270.0, 263.0, 293.0, 280.0, 273.0, 257.0, 291.0, 285.0, 281.0, 298.0, 283.0, 287.0, 293.0, 286.0, 252.0, 270.0, 278.0, 295.0, 267.0, 252.0, 263.0, 270.0, 282.0, 297.0, 268.0, 262.0, 287.0, 292.0, 292.0, 287.0, 284.0, 295.0, 276.0, 300.0, 280.0, 293.0, 261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836350018322129, "mean_inference_ms": 1.2057417308134253, "mean_action_processing_ms": 0.13235457058603195, "mean_env_wait_ms": 0.837765417455068, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4172800, "num_agent_steps_trained": 4172800, "num_env_steps_sampled": 2086400, "num_env_steps_trained": 2086400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2086400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4172800, "timers": {"training_iteration_time_ms": 3598.076, "learn_time_ms": 1088.97, "learn_throughput": 11754.222, "synch_weights_time_ms": 12.913}, "counters": {"num_env_steps_sampled": 2086400, "num_env_steps_trained": 2086400, "num_agent_steps_sampled": 4172800, "num_agent_steps_trained": 4172800}, "done": false, "episodes_total": 5216, "training_iteration": 163, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-32", "timestamp": 1666581032, "time_this_iter_s": 3.6558845043182373, "time_total_s": 611.6447901725769, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 611.6447901725769, "timesteps_since_restore": 0, "iterations_since_restore": 163, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.0, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 166.81, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.62, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.34, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.65, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0022505151573568583, "policy_loss": -0.00245782732963562, "vf_loss": 7.431617736816406, "vf_explained_var": 0.670035719871521, "kl": 0.0017980989068746567, "entropy": 1.07169771194458, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2099200, "num_env_steps_trained": 2099200, "num_agent_steps_sampled": 4198400, "num_agent_steps_trained": 4198400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 544.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 272.005}, "custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 166.81, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.62, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.76, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.34, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.65, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.39, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836047998540244, "mean_inference_ms": 1.205887706872132, "mean_action_processing_ms": 0.13234677021859106, "mean_env_wait_ms": 0.8378906222560302, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 544.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 307.0}, "policy_reward_mean": {"ppo": 272.005}, "hist_stats": {"episode_reward": [525.0, 579.0, 564.0, 570.0, 519.0, 567.0, 564.0, 573.0, 576.0, 576.0, 579.0, 533.0, 576.0, 530.0, 525.0, 573.0, 579.0, 570.0, 573.0, 573.0, 570.0, 570.0, 579.0, 516.0, 576.0, 530.0, 482.0, 576.0, 576.0, 527.0, 573.0, 582.0, 579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 285.0, 294.0, 276.0, 288.0, 283.0, 287.0, 264.0, 255.0, 301.0, 266.0, 279.0, 285.0, 278.0, 295.0, 292.0, 284.0, 300.0, 276.0, 289.0, 290.0, 271.0, 262.0, 288.0, 288.0, 265.0, 265.0, 262.0, 263.0, 286.0, 287.0, 290.0, 289.0, 279.0, 291.0, 288.0, 285.0, 291.0, 282.0, 280.0, 290.0, 293.0, 277.0, 292.0, 287.0, 242.0, 274.0, 278.0, 298.0, 270.0, 260.0, 236.0, 246.0, 295.0, 281.0, 292.0, 284.0, 260.0, 267.0, 283.0, 290.0, 286.0, 296.0, 291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6836047998540244, "mean_inference_ms": 1.205887706872132, "mean_action_processing_ms": 0.13234677021859106, "mean_env_wait_ms": 0.8378906222560302, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4198400, "num_agent_steps_trained": 4198400, "num_env_steps_sampled": 2099200, "num_env_steps_trained": 2099200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2099200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4198400, "timers": {"training_iteration_time_ms": 3611.249, "learn_time_ms": 1083.015, "learn_throughput": 11818.859, "synch_weights_time_ms": 13.057}, "counters": {"num_env_steps_sampled": 2099200, "num_env_steps_trained": 2099200, "num_agent_steps_sampled": 4198400, "num_agent_steps_trained": 4198400}, "done": false, "episodes_total": 5248, "training_iteration": 164, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-36", "timestamp": 1666581036, "time_this_iter_s": 3.8051187992095947, "time_total_s": 615.4499089717865, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 615.4499089717865, "timesteps_since_restore": 0, "iterations_since_restore": 164, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.8, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 166.78, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.44, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.87, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.44, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.44, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00199544383212924, "policy_loss": 0.0018049610080197453, "vf_loss": 7.202265739440918, "vf_explained_var": 0.6753696799278259, "kl": 0.001835355069488287, "entropy": 1.0594866275787354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2112000, "num_env_steps_trained": 2112000, "num_agent_steps_sampled": 4224000, "num_agent_steps_trained": 4224000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 543.98, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 271.99}, "custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 20, "sparse_reward_max": 200, "shaped_reward_mean": 166.78, "shaped_reward_min": 26, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.44, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.87, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.44, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.44, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6835710921630246, "mean_inference_ms": 1.2060351092122445, "mean_action_processing_ms": 0.13234189490578338, "mean_env_wait_ms": 0.8380345595280514, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 66.0, "episode_reward_mean": 543.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 271.99}, "hist_stats": {"episode_reward": [579.0, 522.0, 579.0, 561.0, 567.0, 513.0, 522.0, 66.0, 530.0, 522.0, 533.0, 473.0, 519.0, 516.0, 530.0, 582.0, 527.0, 522.0, 567.0, 527.0, 533.0, 573.0, 576.0, 519.0, 573.0, 516.0, 522.0, 582.0, 522.0, 573.0, 519.0, 576.0, 579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 258.0, 264.0, 278.0, 301.0, 274.0, 287.0, 269.0, 298.0, 267.0, 246.0, 265.0, 257.0, 34.0, 32.0, 278.0, 252.0, 248.0, 274.0, 277.0, 256.0, 248.0, 225.0, 266.0, 253.0, 263.0, 253.0, 278.0, 252.0, 307.0, 275.0, 247.0, 280.0, 264.0, 258.0, 285.0, 282.0, 269.0, 258.0, 260.0, 273.0, 285.0, 288.0, 287.0, 289.0, 273.0, 246.0, 285.0, 288.0, 249.0, 267.0, 263.0, 259.0, 286.0, 296.0, 259.0, 263.0, 294.0, 279.0, 270.0, 249.0, 277.0, 299.0, 291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6835710921630246, "mean_inference_ms": 1.2060351092122445, "mean_action_processing_ms": 0.13234189490578338, "mean_env_wait_ms": 0.8380345595280514, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4224000, "num_agent_steps_trained": 4224000, "num_env_steps_sampled": 2112000, "num_env_steps_trained": 2112000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2112000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4224000, "timers": {"training_iteration_time_ms": 3600.771, "learn_time_ms": 1081.655, "learn_throughput": 11833.72, "synch_weights_time_ms": 12.764}, "counters": {"num_env_steps_sampled": 2112000, "num_env_steps_trained": 2112000, "num_agent_steps_sampled": 4224000, "num_agent_steps_trained": 4224000}, "done": false, "episodes_total": 5280, "training_iteration": 165, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-40", "timestamp": 1666581040, "time_this_iter_s": 3.583083391189575, "time_total_s": 619.0329923629761, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 619.0329923629761, "timesteps_since_restore": 0, "iterations_since_restore": 165, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.116666666666667, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.17, "shaped_reward_min": 124, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.74, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.22, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.13, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.62, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.13, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.62, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.13, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.62, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0029030703008174896, "policy_loss": -0.0030936463735997677, "vf_loss": 7.157515048980713, "vf_explained_var": 0.6804929971694946, "kl": 0.0017253122059628367, "entropy": 1.050349473953247, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2124800, "num_env_steps_trained": 2124800, "num_agent_steps_sampled": 4249600, "num_agent_steps_trained": 4249600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 444.0, "episode_reward_mean": 553.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 214.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 276.785}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.17, "shaped_reward_min": 124, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.74, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.22, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.72, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.13, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.62, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.13, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.62, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.13, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.62, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6835082199229373, "mean_inference_ms": 1.2061519460108794, "mean_action_processing_ms": 0.13233370628692803, "mean_env_wait_ms": 0.8381257388110719, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 444.0, "episode_reward_mean": 553.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 214.0}, "policy_reward_max": {"ppo": 309.0}, "policy_reward_mean": {"ppo": 276.785}, "hist_stats": {"episode_reward": [579.0, 573.0, 525.0, 516.0, 522.0, 579.0, 576.0, 530.0, 582.0, 522.0, 567.0, 513.0, 582.0, 519.0, 576.0, 525.0, 552.0, 582.0, 525.0, 567.0, 444.0, 522.0, 579.0, 525.0, 525.0, 579.0, 570.0, 573.0, 570.0, 522.0, 573.0, 513.0, 519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 282.0, 291.0, 260.0, 265.0, 270.0, 246.0, 261.0, 261.0, 303.0, 276.0, 289.0, 287.0, 262.0, 268.0, 293.0, 289.0, 254.0, 268.0, 277.0, 290.0, 266.0, 247.0, 294.0, 288.0, 265.0, 254.0, 276.0, 300.0, 261.0, 264.0, 288.0, 264.0, 290.0, 292.0, 270.0, 255.0, 281.0, 286.0, 230.0, 214.0, 249.0, 273.0, 287.0, 292.0, 263.0, 262.0, 270.0, 255.0, 273.0, 306.0, 282.0, 288.0, 282.0, 291.0, 291.0, 279.0, 265.0, 257.0, 294.0, 279.0, 264.0, 249.0, 243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6835082199229373, "mean_inference_ms": 1.2061519460108794, "mean_action_processing_ms": 0.13233370628692803, "mean_env_wait_ms": 0.8381257388110719, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4249600, "num_agent_steps_trained": 4249600, "num_env_steps_sampled": 2124800, "num_env_steps_trained": 2124800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2124800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4249600, "timers": {"training_iteration_time_ms": 3607.208, "learn_time_ms": 1092.222, "learn_throughput": 11719.229, "synch_weights_time_ms": 12.734}, "counters": {"num_env_steps_sampled": 2124800, "num_env_steps_trained": 2124800, "num_agent_steps_sampled": 4249600, "num_agent_steps_trained": 4249600}, "done": false, "episodes_total": 5312, "training_iteration": 166, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-44", "timestamp": 1666581044, "time_this_iter_s": 3.674652099609375, "time_total_s": 622.7076444625854, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 622.7076444625854, "timesteps_since_restore": 0, "iterations_since_restore": 166, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.52, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 168.43, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.12, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.56, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.56, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.56, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013110751751810312, "policy_loss": -0.0015277772909030318, "vf_loss": 7.448184013366699, "vf_explained_var": 0.6839404106140137, "kl": 0.002003659959882498, "entropy": 1.0562318563461304, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2137600, "num_env_steps_trained": 2137600, "num_agent_steps_sampled": 4275200, "num_agent_steps_trained": 4275200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 551.23, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 275.615}, "custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 168.43, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.12, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.27, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.56, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.56, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.56, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834355732851334, "mean_inference_ms": 1.2060678323861582, "mean_action_processing_ms": 0.13232880100409083, "mean_env_wait_ms": 0.8379897883903749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 551.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 275.615}, "hist_stats": {"episode_reward": [519.0, 564.0, 570.0, 519.0, 576.0, 579.0, 573.0, 527.0, 521.0, 522.0, 530.0, 573.0, 525.0, 527.0, 570.0, 570.0, 576.0, 576.0, 570.0, 579.0, 576.0, 530.0, 570.0, 576.0, 576.0, 573.0, 525.0, 522.0, 576.0, 570.0, 570.0, 570.0, 522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [243.0, 276.0, 274.0, 290.0, 280.0, 290.0, 263.0, 256.0, 289.0, 287.0, 290.0, 289.0, 283.0, 290.0, 252.0, 275.0, 264.0, 257.0, 266.0, 256.0, 270.0, 260.0, 290.0, 283.0, 259.0, 266.0, 248.0, 279.0, 283.0, 287.0, 287.0, 283.0, 296.0, 280.0, 300.0, 276.0, 284.0, 286.0, 285.0, 294.0, 283.0, 293.0, 268.0, 262.0, 291.0, 279.0, 290.0, 286.0, 295.0, 281.0, 285.0, 288.0, 274.0, 251.0, 270.0, 252.0, 284.0, 292.0, 285.0, 285.0, 280.0, 290.0, 290.0, 280.0, 254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834355732851334, "mean_inference_ms": 1.2060678323861582, "mean_action_processing_ms": 0.13232880100409083, "mean_env_wait_ms": 0.8379897883903749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4275200, "num_agent_steps_trained": 4275200, "num_env_steps_sampled": 2137600, "num_env_steps_trained": 2137600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2137600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4275200, "timers": {"training_iteration_time_ms": 3605.504, "learn_time_ms": 1092.752, "learn_throughput": 11713.543, "synch_weights_time_ms": 12.734}, "counters": {"num_env_steps_sampled": 2137600, "num_env_steps_trained": 2137600, "num_agent_steps_sampled": 4275200, "num_agent_steps_trained": 4275200}, "done": false, "episodes_total": 5344, "training_iteration": 167, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-48", "timestamp": 1666581048, "time_this_iter_s": 3.6411495208740234, "time_total_s": 626.3487939834595, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 626.3487939834595, "timesteps_since_restore": 0, "iterations_since_restore": 167, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.3, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 169.53, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.98, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.43, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.67, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.43, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.43, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00048789544962346554, "policy_loss": -0.0007036026800051332, "vf_loss": 7.372027397155762, "vf_explained_var": 0.699303388595581, "kl": 0.0018403129652142525, "entropy": 1.0429902076721191, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2150400, "num_env_steps_trained": 2150400, "num_agent_steps_sampled": 4300800, "num_agent_steps_trained": 4300800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 553.53, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 276.765}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 169.53, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.98, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.43, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.61, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.67, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.95, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.43, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.43, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6833894892310337, "mean_inference_ms": 1.2059795926228936, "mean_action_processing_ms": 0.13232587728396183, "mean_env_wait_ms": 0.8377997008246847, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 553.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 276.765}, "hist_stats": {"episode_reward": [522.0, 579.0, 570.0, 579.0, 522.0, 573.0, 522.0, 570.0, 530.0, 570.0, 573.0, 573.0, 576.0, 573.0, 530.0, 522.0, 573.0, 570.0, 576.0, 519.0, 525.0, 579.0, 570.0, 525.0, 522.0, 576.0, 570.0, 576.0, 525.0, 570.0, 564.0, 525.0, 576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 289.0, 290.0, 285.0, 285.0, 270.0, 309.0, 275.0, 247.0, 280.0, 293.0, 266.0, 256.0, 303.0, 267.0, 259.0, 271.0, 299.0, 271.0, 290.0, 283.0, 301.0, 272.0, 280.0, 296.0, 279.0, 294.0, 281.0, 249.0, 262.0, 260.0, 283.0, 290.0, 287.0, 283.0, 289.0, 287.0, 257.0, 262.0, 258.0, 267.0, 287.0, 292.0, 278.0, 292.0, 254.0, 271.0, 257.0, 265.0, 300.0, 276.0, 295.0, 275.0, 293.0, 283.0, 265.0, 260.0, 279.0, 291.0, 276.0, 288.0, 256.0, 269.0, 276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6833894892310337, "mean_inference_ms": 1.2059795926228936, "mean_action_processing_ms": 0.13232587728396183, "mean_env_wait_ms": 0.8377997008246847, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4300800, "num_agent_steps_trained": 4300800, "num_env_steps_sampled": 2150400, "num_env_steps_trained": 2150400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2150400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4300800, "timers": {"training_iteration_time_ms": 3618.811, "learn_time_ms": 1103.342, "learn_throughput": 11601.121, "synch_weights_time_ms": 12.497}, "counters": {"num_env_steps_sampled": 2150400, "num_env_steps_trained": 2150400, "num_agent_steps_sampled": 4300800, "num_agent_steps_trained": 4300800}, "done": false, "episodes_total": 5376, "training_iteration": 168, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-52", "timestamp": 1666581052, "time_this_iter_s": 3.766408681869507, "time_total_s": 630.115202665329, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 630.115202665329, "timesteps_since_restore": 0, "iterations_since_restore": 168, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666664, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 170.25, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.85, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.62, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.63, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.12, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.63, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.63, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001223443541675806, "policy_loss": -0.0014694086275994778, "vf_loss": 7.61738920211792, "vf_explained_var": 0.6677297949790955, "kl": 0.001981202280148864, "entropy": 1.031548261642456, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2163200, "num_env_steps_trained": 2163200, "num_agent_steps_sampled": 4326400, "num_agent_steps_trained": 4326400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 553.85, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 276.925}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 170.25, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.85, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.62, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.63, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.12, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.57, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.55, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.63, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.63, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834106688383577, "mean_inference_ms": 1.2059047272839276, "mean_action_processing_ms": 0.13233080968882707, "mean_env_wait_ms": 0.8376732814698284, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 180.0, "episode_reward_mean": 553.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 84.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 276.925}, "hist_stats": {"episode_reward": [576.0, 567.0, 582.0, 576.0, 522.0, 522.0, 573.0, 630.0, 461.0, 519.0, 519.0, 579.0, 573.0, 576.0, 570.0, 570.0, 570.0, 530.0, 519.0, 519.0, 573.0, 530.0, 570.0, 576.0, 495.0, 570.0, 527.0, 180.0, 576.0, 576.0, 573.0, 522.0, 525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 300.0, 290.0, 277.0, 292.0, 290.0, 282.0, 294.0, 252.0, 270.0, 265.0, 257.0, 277.0, 296.0, 306.0, 324.0, 230.0, 231.0, 250.0, 269.0, 256.0, 263.0, 287.0, 292.0, 287.0, 286.0, 278.0, 298.0, 288.0, 282.0, 286.0, 284.0, 280.0, 290.0, 262.0, 268.0, 248.0, 271.0, 275.0, 244.0, 289.0, 284.0, 265.0, 265.0, 285.0, 285.0, 280.0, 296.0, 240.0, 255.0, 286.0, 284.0, 266.0, 261.0, 96.0, 84.0, 285.0, 291.0, 287.0, 289.0, 282.0, 291.0, 273.0, 249.0, 267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834106688383577, "mean_inference_ms": 1.2059047272839276, "mean_action_processing_ms": 0.13233080968882707, "mean_env_wait_ms": 0.8376732814698284, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4326400, "num_agent_steps_trained": 4326400, "num_env_steps_sampled": 2163200, "num_env_steps_trained": 2163200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2163200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4326400, "timers": {"training_iteration_time_ms": 3620.158, "learn_time_ms": 1104.52, "learn_throughput": 11588.742, "synch_weights_time_ms": 12.513}, "counters": {"num_env_steps_sampled": 2163200, "num_env_steps_trained": 2163200, "num_agent_steps_sampled": 4326400, "num_agent_steps_trained": 4326400}, "done": false, "episodes_total": 5408, "training_iteration": 169, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-56", "timestamp": 1666581056, "time_this_iter_s": 3.7082767486572266, "time_total_s": 633.8234794139862, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 633.8234794139862, "timesteps_since_restore": 0, "iterations_since_restore": 169, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.883333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.21, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.04, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.13, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.77, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.92, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.68, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.92, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.92, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00020206667250022292, "policy_loss": -2.2467749658972025e-05, "vf_loss": 7.46675968170166, "vf_explained_var": 0.680292010307312, "kl": 0.0021671182475984097, "entropy": 1.0442825555801392, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2176000, "num_env_steps_trained": 2176000, "num_agent_steps_sampled": 4352000, "num_agent_steps_trained": 4352000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 447.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 280.105}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.21, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.04, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.13, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.77, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.92, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.22, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.58, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.36, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.72, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.68, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.92, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.92, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834095161080666, "mean_inference_ms": 1.2058352661434038, "mean_action_processing_ms": 0.13233467371881377, "mean_env_wait_ms": 0.8375451378942773, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 447.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 221.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 280.105}, "hist_stats": {"episode_reward": [525.0, 582.0, 576.0, 570.0, 533.0, 573.0, 582.0, 579.0, 579.0, 579.0, 522.0, 579.0, 522.0, 579.0, 567.0, 570.0, 522.0, 576.0, 522.0, 576.0, 530.0, 576.0, 579.0, 525.0, 573.0, 582.0, 564.0, 579.0, 579.0, 522.0, 630.0, 576.0, 576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 293.0, 289.0, 288.0, 288.0, 292.0, 278.0, 268.0, 265.0, 278.0, 295.0, 285.0, 297.0, 283.0, 296.0, 282.0, 297.0, 278.0, 301.0, 269.0, 253.0, 282.0, 297.0, 265.0, 257.0, 288.0, 291.0, 287.0, 280.0, 294.0, 276.0, 260.0, 262.0, 285.0, 291.0, 264.0, 258.0, 292.0, 284.0, 259.0, 271.0, 285.0, 291.0, 277.0, 302.0, 251.0, 274.0, 283.0, 290.0, 300.0, 282.0, 279.0, 285.0, 286.0, 293.0, 277.0, 302.0, 251.0, 271.0, 310.0, 320.0, 291.0, 285.0, 283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6834095161080666, "mean_inference_ms": 1.2058352661434038, "mean_action_processing_ms": 0.13233467371881377, "mean_env_wait_ms": 0.8375451378942773, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4352000, "num_agent_steps_trained": 4352000, "num_env_steps_sampled": 2176000, "num_env_steps_trained": 2176000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2176000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4352000, "timers": {"training_iteration_time_ms": 3629.259, "learn_time_ms": 1114.057, "learn_throughput": 11489.537, "synch_weights_time_ms": 12.651}, "counters": {"num_env_steps_sampled": 2176000, "num_env_steps_trained": 2176000, "num_agent_steps_sampled": 4352000, "num_agent_steps_trained": 4352000}, "done": false, "episodes_total": 5440, "training_iteration": 170, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-10-59", "timestamp": 1666581059, "time_this_iter_s": 3.667999029159546, "time_total_s": 637.4914784431458, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 637.4914784431458, "timesteps_since_restore": 0, "iterations_since_restore": 170, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.56, "ram_util_percent": 10.620000000000001}} +{"custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.38, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.85, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.53, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.85, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.53, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.85, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.53, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007277140975929797, "policy_loss": 0.0004762199823744595, "vf_loss": 7.702224254608154, "vf_explained_var": 0.6904242634773254, "kl": 0.002471720799803734, "entropy": 1.0374559164047241, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2188800, "num_env_steps_trained": 2188800, "num_agent_steps_sampled": 4377600, "num_agent_steps_trained": 4377600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 556.98, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 278.49}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.38, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.49, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.96, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.85, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.53, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.64, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.37, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.85, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.53, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.85, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.53, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6833479634776382, "mean_inference_ms": 1.2057306823526823, "mean_action_processing_ms": 0.13233109475853205, "mean_env_wait_ms": 0.8373725097047232, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 556.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 278.49}, "hist_stats": {"episode_reward": [576.0, 573.0, 576.0, 530.0, 527.0, 576.0, 630.0, 527.0, 582.0, 579.0, 573.0, 579.0, 527.0, 567.0, 522.0, 579.0, 579.0, 536.0, 582.0, 484.0, 582.0, 576.0, 503.0, 570.0, 573.0, 530.0, 525.0, 536.0, 525.0, 573.0, 582.0, 579.0, 576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 275.0, 298.0, 291.0, 285.0, 256.0, 274.0, 257.0, 270.0, 292.0, 284.0, 318.0, 312.0, 277.0, 250.0, 282.0, 300.0, 290.0, 289.0, 290.0, 283.0, 285.0, 294.0, 267.0, 260.0, 286.0, 281.0, 281.0, 241.0, 292.0, 287.0, 281.0, 298.0, 268.0, 268.0, 300.0, 282.0, 236.0, 248.0, 295.0, 287.0, 291.0, 285.0, 251.0, 252.0, 283.0, 287.0, 286.0, 287.0, 258.0, 272.0, 266.0, 259.0, 268.0, 268.0, 270.0, 255.0, 278.0, 295.0, 283.0, 299.0, 281.0, 298.0, 297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6833479634776382, "mean_inference_ms": 1.2057306823526823, "mean_action_processing_ms": 0.13233109475853205, "mean_env_wait_ms": 0.8373725097047232, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4377600, "num_agent_steps_trained": 4377600, "num_env_steps_sampled": 2188800, "num_env_steps_trained": 2188800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2188800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4377600, "timers": {"training_iteration_time_ms": 3640.814, "learn_time_ms": 1127.714, "learn_throughput": 11350.399, "synch_weights_time_ms": 11.954}, "counters": {"num_env_steps_sampled": 2188800, "num_env_steps_trained": 2188800, "num_agent_steps_sampled": 4377600, "num_agent_steps_trained": 4377600}, "done": false, "episodes_total": 5472, "training_iteration": 171, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-03", "timestamp": 1666581063, "time_this_iter_s": 3.769057512283325, "time_total_s": 641.2605359554291, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 641.2605359554291, "timesteps_since_restore": 0, "iterations_since_restore": 171, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.46666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.99, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.34, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.29, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 15.89, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007936491165310144, "policy_loss": 0.0005551030626520514, "vf_loss": 7.581031322479248, "vf_explained_var": 0.6723864674568176, "kl": 0.0018949867226183414, "entropy": 1.039113998413086, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2201600, "num_env_steps_trained": 2201600, "num_agent_steps_sampled": 4403200, "num_agent_steps_trained": 4403200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 560.39, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 280.195}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.99, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.34, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.29, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 15.89, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.74, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.85, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6832310630134222, "mean_inference_ms": 1.2056266619739329, "mean_action_processing_ms": 0.13232402816482625, "mean_env_wait_ms": 0.8371789914650529, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 560.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 280.195}, "hist_stats": {"episode_reward": [576.0, 573.0, 582.0, 447.0, 522.0, 582.0, 513.0, 567.0, 576.0, 573.0, 570.0, 519.0, 579.0, 579.0, 579.0, 576.0, 570.0, 630.0, 527.0, 573.0, 530.0, 509.0, 579.0, 576.0, 519.0, 579.0, 579.0, 576.0, 579.0, 515.0, 573.0, 576.0, 573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 279.0, 283.0, 290.0, 286.0, 296.0, 226.0, 221.0, 260.0, 262.0, 292.0, 290.0, 262.0, 251.0, 289.0, 278.0, 281.0, 295.0, 268.0, 305.0, 292.0, 278.0, 256.0, 263.0, 279.0, 300.0, 296.0, 283.0, 285.0, 294.0, 295.0, 281.0, 291.0, 279.0, 307.0, 323.0, 254.0, 273.0, 283.0, 290.0, 250.0, 280.0, 256.0, 253.0, 290.0, 289.0, 274.0, 302.0, 266.0, 253.0, 298.0, 281.0, 283.0, 296.0, 281.0, 295.0, 289.0, 290.0, 258.0, 257.0, 302.0, 271.0, 305.0, 271.0, 277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6832310630134222, "mean_inference_ms": 1.2056266619739329, "mean_action_processing_ms": 0.13232402816482625, "mean_env_wait_ms": 0.8371789914650529, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4403200, "num_agent_steps_trained": 4403200, "num_env_steps_sampled": 2201600, "num_env_steps_trained": 2201600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2201600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4403200, "timers": {"training_iteration_time_ms": 3629.816, "learn_time_ms": 1123.526, "learn_throughput": 11392.702, "synch_weights_time_ms": 12.34}, "counters": {"num_env_steps_sampled": 2201600, "num_env_steps_trained": 2201600, "num_agent_steps_sampled": 4403200, "num_agent_steps_trained": 4403200}, "done": false, "episodes_total": 5504, "training_iteration": 172, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-07", "timestamp": 1666581067, "time_this_iter_s": 3.5678622722625732, "time_total_s": 644.8283982276917, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 644.8283982276917, "timesteps_since_restore": 0, "iterations_since_restore": 172, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.580000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.24, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.71, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.02, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.02, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.02, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011171973310410976, "policy_loss": -0.0013455945299938321, "vf_loss": 7.470318794250488, "vf_explained_var": 0.6936505436897278, "kl": 0.002078745514154434, "entropy": 1.0372672080993652, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2214400, "num_env_steps_trained": 2214400, "num_agent_steps_sampled": 4428800, "num_agent_steps_trained": 4428800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 562.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 281.02}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.24, "shaped_reward_min": 54, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.71, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 4, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.02, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.42, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.02, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.02, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6830980537233386, "mean_inference_ms": 1.205477553894199, "mean_action_processing_ms": 0.13231296218106148, "mean_env_wait_ms": 0.8369617254633316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 174.0, "episode_reward_mean": 562.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 82.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 281.02}, "hist_stats": {"episode_reward": [573.0, 582.0, 525.0, 552.0, 522.0, 582.0, 573.0, 525.0, 582.0, 582.0, 579.0, 627.0, 579.0, 518.0, 573.0, 576.0, 579.0, 573.0, 582.0, 579.0, 576.0, 582.0, 582.0, 579.0, 573.0, 587.0, 174.0, 573.0, 465.0, 570.0, 530.0, 530.0, 522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 296.0, 289.0, 293.0, 256.0, 269.0, 273.0, 279.0, 277.0, 245.0, 291.0, 291.0, 287.0, 286.0, 262.0, 263.0, 296.0, 286.0, 280.0, 302.0, 283.0, 296.0, 301.0, 326.0, 281.0, 298.0, 261.0, 257.0, 274.0, 299.0, 289.0, 287.0, 297.0, 282.0, 296.0, 277.0, 290.0, 292.0, 303.0, 276.0, 291.0, 285.0, 284.0, 298.0, 294.0, 288.0, 295.0, 284.0, 283.0, 290.0, 304.0, 283.0, 92.0, 82.0, 279.0, 294.0, 227.0, 238.0, 288.0, 282.0, 268.0, 262.0, 263.0, 267.0, 261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6830980537233386, "mean_inference_ms": 1.205477553894199, "mean_action_processing_ms": 0.13231296218106148, "mean_env_wait_ms": 0.8369617254633316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4428800, "num_agent_steps_trained": 4428800, "num_env_steps_sampled": 2214400, "num_env_steps_trained": 2214400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2214400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4428800, "timers": {"training_iteration_time_ms": 3637.654, "learn_time_ms": 1139.364, "learn_throughput": 11234.342, "synch_weights_time_ms": 11.368}, "counters": {"num_env_steps_sampled": 2214400, "num_env_steps_trained": 2214400, "num_agent_steps_sampled": 4428800, "num_agent_steps_trained": 4428800}, "done": false, "episodes_total": 5536, "training_iteration": 173, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-11", "timestamp": 1666581071, "time_this_iter_s": 3.733807325363159, "time_total_s": 648.5622055530548, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 648.5622055530548, "timesteps_since_restore": 0, "iterations_since_restore": 173, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.300000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.57, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.75, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003722138935700059, "policy_loss": -0.0006101266480982304, "vf_loss": 7.5663604736328125, "vf_explained_var": 0.672671914100647, "kl": 0.0019681788980960846, "entropy": 1.0374469757080078, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2227200, "num_env_steps_trained": 2227200, "num_agent_steps_sampled": 4454400, "num_agent_steps_trained": 4454400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 566.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.085}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.57, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.75, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.41, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682994178956515, "mean_inference_ms": 1.205319486334547, "mean_action_processing_ms": 0.13230146982573307, "mean_env_wait_ms": 0.8367514360219342, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 566.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.085}, "hist_stats": {"episode_reward": [522.0, 579.0, 570.0, 582.0, 573.0, 530.0, 579.0, 573.0, 587.0, 573.0, 533.0, 582.0, 570.0, 576.0, 579.0, 555.0, 530.0, 582.0, 525.0, 582.0, 525.0, 558.0, 525.0, 576.0, 573.0, 582.0, 627.0, 573.0, 590.0, 576.0, 579.0, 579.0, 630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 261.0, 288.0, 291.0, 285.0, 285.0, 296.0, 286.0, 273.0, 300.0, 267.0, 263.0, 289.0, 290.0, 289.0, 284.0, 278.0, 309.0, 281.0, 292.0, 253.0, 280.0, 294.0, 288.0, 282.0, 288.0, 295.0, 281.0, 297.0, 282.0, 278.0, 277.0, 256.0, 274.0, 280.0, 302.0, 261.0, 264.0, 288.0, 294.0, 269.0, 256.0, 282.0, 276.0, 269.0, 256.0, 296.0, 280.0, 293.0, 280.0, 302.0, 280.0, 316.0, 311.0, 291.0, 282.0, 296.0, 294.0, 290.0, 286.0, 287.0, 292.0, 297.0, 282.0, 308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682994178956515, "mean_inference_ms": 1.205319486334547, "mean_action_processing_ms": 0.13230146982573307, "mean_env_wait_ms": 0.8367514360219342, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4454400, "num_agent_steps_trained": 4454400, "num_env_steps_sampled": 2227200, "num_env_steps_trained": 2227200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2227200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4454400, "timers": {"training_iteration_time_ms": 3618.034, "learn_time_ms": 1141.84, "learn_throughput": 11209.981, "synch_weights_time_ms": 10.787}, "counters": {"num_env_steps_sampled": 2227200, "num_env_steps_trained": 2227200, "num_agent_steps_sampled": 4454400, "num_agent_steps_trained": 4454400}, "done": false, "episodes_total": 5568, "training_iteration": 174, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-15", "timestamp": 1666581075, "time_this_iter_s": 3.605084180831909, "time_total_s": 652.1672897338867, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 652.1672897338867, "timesteps_since_restore": 0, "iterations_since_restore": 174, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.633333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.07, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.19, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.26, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.26, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.26, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019827396608889103, "policy_loss": -0.0022173135075718164, "vf_loss": 7.520552635192871, "vf_explained_var": 0.6868171095848083, "kl": 0.00197706394828856, "entropy": 1.0349608659744263, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2240000, "num_env_steps_trained": 2240000, "num_agent_steps_sampled": 4480000, "num_agent_steps_trained": 4480000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 564.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.435}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.07, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.19, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.32, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.26, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.26, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.26, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6829100253627054, "mean_inference_ms": 1.2051592701617222, "mean_action_processing_ms": 0.13229100091564924, "mean_env_wait_ms": 0.8365500433383677, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 564.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.435}, "hist_stats": {"episode_reward": [630.0, 525.0, 576.0, 576.0, 576.0, 530.0, 579.0, 579.0, 576.0, 582.0, 570.0, 570.0, 576.0, 561.0, 582.0, 579.0, 576.0, 567.0, 579.0, 579.0, 579.0, 539.0, 576.0, 587.0, 579.0, 576.0, 573.0, 573.0, 587.0, 582.0, 576.0, 582.0, 579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [308.0, 322.0, 263.0, 262.0, 287.0, 289.0, 299.0, 277.0, 300.0, 276.0, 272.0, 258.0, 297.0, 282.0, 291.0, 288.0, 286.0, 290.0, 293.0, 289.0, 276.0, 294.0, 283.0, 287.0, 288.0, 288.0, 286.0, 275.0, 271.0, 311.0, 285.0, 294.0, 291.0, 285.0, 287.0, 280.0, 280.0, 299.0, 306.0, 273.0, 285.0, 294.0, 269.0, 270.0, 296.0, 280.0, 288.0, 299.0, 288.0, 291.0, 289.0, 287.0, 290.0, 283.0, 283.0, 290.0, 298.0, 289.0, 295.0, 287.0, 279.0, 297.0, 275.0, 307.0, 286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6829100253627054, "mean_inference_ms": 1.2051592701617222, "mean_action_processing_ms": 0.13229100091564924, "mean_env_wait_ms": 0.8365500433383677, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4480000, "num_agent_steps_trained": 4480000, "num_env_steps_sampled": 2240000, "num_env_steps_trained": 2240000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2240000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4480000, "timers": {"training_iteration_time_ms": 3633.635, "learn_time_ms": 1148.824, "learn_throughput": 11141.83, "synch_weights_time_ms": 10.544}, "counters": {"num_env_steps_sampled": 2240000, "num_env_steps_trained": 2240000, "num_agent_steps_sampled": 4480000, "num_agent_steps_trained": 4480000}, "done": false, "episodes_total": 5600, "training_iteration": 175, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-19", "timestamp": 1666581079, "time_this_iter_s": 3.750723123550415, "time_total_s": 655.9180128574371, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 655.9180128574371, "timesteps_since_restore": 0, "iterations_since_restore": 175, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.8, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.48, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017110540065914392, "policy_loss": 0.0014627662021666765, "vf_loss": 7.637947082519531, "vf_explained_var": 0.6661041975021362, "kl": 0.0020245490595698357, "entropy": 1.031015157699585, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2252800, "num_env_steps_trained": 2252800, "num_agent_steps_sampled": 4505600, "num_agent_steps_trained": 4505600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 562.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 281.175}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 113, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.48, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.92, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828569282642821, "mean_inference_ms": 1.2050293820721487, "mean_action_processing_ms": 0.13228236830438264, "mean_env_wait_ms": 0.8363800663320577, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 273.0, "episode_reward_mean": 562.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 135.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 281.175}, "hist_stats": {"episode_reward": [579.0, 564.0, 273.0, 582.0, 573.0, 519.0, 579.0, 582.0, 579.0, 522.0, 567.0, 567.0, 579.0, 576.0, 576.0, 573.0, 522.0, 564.0, 579.0, 576.0, 527.0, 579.0, 584.0, 576.0, 584.0, 582.0, 587.0, 576.0, 576.0, 522.0, 582.0, 582.0, 533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 281.0, 283.0, 138.0, 135.0, 298.0, 284.0, 279.0, 294.0, 256.0, 263.0, 288.0, 291.0, 295.0, 287.0, 295.0, 284.0, 255.0, 267.0, 266.0, 301.0, 295.0, 272.0, 292.0, 287.0, 295.0, 281.0, 285.0, 291.0, 269.0, 304.0, 255.0, 267.0, 279.0, 285.0, 289.0, 290.0, 296.0, 280.0, 279.0, 248.0, 300.0, 279.0, 285.0, 299.0, 298.0, 278.0, 290.0, 294.0, 281.0, 301.0, 304.0, 283.0, 279.0, 297.0, 286.0, 290.0, 257.0, 265.0, 289.0, 293.0, 281.0, 301.0, 280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828569282642821, "mean_inference_ms": 1.2050293820721487, "mean_action_processing_ms": 0.13228236830438264, "mean_env_wait_ms": 0.8363800663320577, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4505600, "num_agent_steps_trained": 4505600, "num_env_steps_sampled": 2252800, "num_env_steps_trained": 2252800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2252800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4505600, "timers": {"training_iteration_time_ms": 3632.017, "learn_time_ms": 1144.529, "learn_throughput": 11183.643, "synch_weights_time_ms": 11.141}, "counters": {"num_env_steps_sampled": 2252800, "num_env_steps_trained": 2252800, "num_agent_steps_sampled": 4505600, "num_agent_steps_trained": 4505600}, "done": false, "episodes_total": 5632, "training_iteration": 176, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-22", "timestamp": 1666581082, "time_this_iter_s": 3.664487361907959, "time_total_s": 659.5825002193451, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 659.5825002193451, "timesteps_since_restore": 0, "iterations_since_restore": 176, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.333333333333332, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.91, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.3, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.85, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003441051230765879, "policy_loss": -0.0005928004975430667, "vf_loss": 7.614487171173096, "vf_explained_var": 0.663824200630188, "kl": 0.002044765744358301, "entropy": 1.0255047082901, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2265600, "num_env_steps_trained": 2265600, "num_agent_steps_sampled": 4531200, "num_agent_steps_trained": 4531200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 441.0, "episode_reward_mean": 567.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.855}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.91, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.3, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.85, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828439834282318, "mean_inference_ms": 1.2049497279210897, "mean_action_processing_ms": 0.13228139387451182, "mean_env_wait_ms": 0.8362641339684356, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 441.0, "episode_reward_mean": 567.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.855}, "hist_stats": {"episode_reward": [533.0, 582.0, 582.0, 510.0, 573.0, 579.0, 522.0, 527.0, 579.0, 530.0, 573.0, 525.0, 570.0, 579.0, 573.0, 579.0, 570.0, 576.0, 573.0, 579.0, 525.0, 579.0, 582.0, 576.0, 582.0, 573.0, 579.0, 522.0, 522.0, 522.0, 576.0, 630.0, 579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 253.0, 286.0, 296.0, 283.0, 299.0, 263.0, 247.0, 284.0, 289.0, 283.0, 296.0, 267.0, 255.0, 276.0, 251.0, 288.0, 291.0, 267.0, 263.0, 295.0, 278.0, 256.0, 269.0, 293.0, 277.0, 296.0, 283.0, 275.0, 298.0, 288.0, 291.0, 296.0, 274.0, 282.0, 294.0, 283.0, 290.0, 281.0, 298.0, 267.0, 258.0, 284.0, 295.0, 283.0, 299.0, 290.0, 286.0, 301.0, 281.0, 282.0, 291.0, 293.0, 286.0, 260.0, 262.0, 267.0, 255.0, 247.0, 275.0, 290.0, 286.0, 318.0, 312.0, 306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828439834282318, "mean_inference_ms": 1.2049497279210897, "mean_action_processing_ms": 0.13228139387451182, "mean_env_wait_ms": 0.8362641339684356, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4531200, "num_agent_steps_trained": 4531200, "num_env_steps_sampled": 2265600, "num_env_steps_trained": 2265600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2265600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4531200, "timers": {"training_iteration_time_ms": 3632.949, "learn_time_ms": 1142.508, "learn_throughput": 11203.42, "synch_weights_time_ms": 11.479}, "counters": {"num_env_steps_sampled": 2265600, "num_env_steps_trained": 2265600, "num_agent_steps_sampled": 4531200, "num_agent_steps_trained": 4531200}, "done": false, "episodes_total": 5664, "training_iteration": 177, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-26", "timestamp": 1666581086, "time_this_iter_s": 3.659736156463623, "time_total_s": 663.2422363758087, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 663.2422363758087, "timesteps_since_restore": 0, "iterations_since_restore": 177, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.020000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.13, "shaped_reward_min": 152, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.2, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.78, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.31, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.31, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.31, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015791004989296198, "policy_loss": 0.0013302306178957224, "vf_loss": 7.5834221839904785, "vf_explained_var": 0.6672846078872681, "kl": 0.0019746399484574795, "entropy": 1.0189414024353027, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2278400, "num_env_steps_trained": 2278400, "num_agent_steps_sampled": 4556800, "num_agent_steps_trained": 4556800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 441.0, "episode_reward_mean": 572.13, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.065}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.13, "shaped_reward_min": 152, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.2, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.91, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.78, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.31, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.31, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.31, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828191897271219, "mean_inference_ms": 1.204880157997455, "mean_action_processing_ms": 0.13227938063631006, "mean_env_wait_ms": 0.8361496603307731, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 441.0, "episode_reward_mean": 572.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.065}, "hist_stats": {"episode_reward": [579.0, 512.0, 573.0, 576.0, 567.0, 570.0, 569.0, 579.0, 576.0, 582.0, 530.0, 579.0, 582.0, 582.0, 567.0, 587.0, 576.0, 522.0, 576.0, 582.0, 582.0, 522.0, 587.0, 579.0, 582.0, 576.0, 582.0, 570.0, 441.0, 522.0, 576.0, 576.0, 576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 273.0, 256.0, 256.0, 274.0, 299.0, 286.0, 290.0, 290.0, 277.0, 287.0, 283.0, 286.0, 283.0, 296.0, 283.0, 293.0, 283.0, 276.0, 306.0, 273.0, 257.0, 289.0, 290.0, 293.0, 289.0, 301.0, 281.0, 285.0, 282.0, 296.0, 291.0, 292.0, 284.0, 267.0, 255.0, 288.0, 288.0, 296.0, 286.0, 289.0, 293.0, 262.0, 260.0, 304.0, 283.0, 295.0, 284.0, 304.0, 278.0, 297.0, 279.0, 293.0, 289.0, 285.0, 285.0, 221.0, 220.0, 250.0, 272.0, 302.0, 274.0, 293.0, 283.0, 277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828191897271219, "mean_inference_ms": 1.204880157997455, "mean_action_processing_ms": 0.13227938063631006, "mean_env_wait_ms": 0.8361496603307731, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4556800, "num_agent_steps_trained": 4556800, "num_env_steps_sampled": 2278400, "num_env_steps_trained": 2278400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2278400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4556800, "timers": {"training_iteration_time_ms": 3626.15, "learn_time_ms": 1137.311, "learn_throughput": 11254.617, "synch_weights_time_ms": 10.795}, "counters": {"num_env_steps_sampled": 2278400, "num_env_steps_trained": 2278400, "num_agent_steps_sampled": 4556800, "num_agent_steps_trained": 4556800}, "done": false, "episodes_total": 5696, "training_iteration": 178, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-30", "timestamp": 1666581090, "time_this_iter_s": 3.7108657360076904, "time_total_s": 666.9531021118164, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 666.9531021118164, "timesteps_since_restore": 0, "iterations_since_restore": 178, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.633333333333333}} +{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 158, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.94, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.75, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.5, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018393670907244086, "policy_loss": -0.0020911097526550293, "vf_loss": 7.623180389404297, "vf_explained_var": 0.6745402812957764, "kl": 0.0019281021086499095, "entropy": 1.021147608757019, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2291200, "num_env_steps_trained": 2291200, "num_agent_steps_sampled": 4582400, "num_agent_steps_trained": 4582400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.005}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 158, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.94, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.75, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.5, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.72, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.72, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.72, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828123814988936, "mean_inference_ms": 1.2051653630779808, "mean_action_processing_ms": 0.13228301841831588, "mean_env_wait_ms": 0.8362093901992204, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.005}, "hist_stats": {"episode_reward": [576.0, 576.0, 576.0, 576.0, 579.0, 522.0, 570.0, 579.0, 579.0, 519.0, 579.0, 576.0, 573.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 582.0, 576.0, 522.0, 570.0, 579.0, 582.0, 573.0, 579.0, 579.0, 573.0, 584.0, 630.0, 579.0, 582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 299.0, 298.0, 278.0, 291.0, 285.0, 296.0, 280.0, 296.0, 283.0, 260.0, 262.0, 281.0, 289.0, 298.0, 281.0, 304.0, 275.0, 272.0, 247.0, 286.0, 293.0, 288.0, 288.0, 281.0, 292.0, 311.0, 319.0, 290.0, 286.0, 287.0, 289.0, 300.0, 276.0, 308.0, 279.0, 297.0, 282.0, 293.0, 289.0, 293.0, 283.0, 277.0, 245.0, 291.0, 279.0, 295.0, 284.0, 295.0, 287.0, 287.0, 286.0, 290.0, 289.0, 287.0, 292.0, 284.0, 289.0, 285.0, 299.0, 321.0, 309.0, 291.0, 288.0, 286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6828123814988936, "mean_inference_ms": 1.2051653630779808, "mean_action_processing_ms": 0.13228301841831588, "mean_env_wait_ms": 0.8362093901992204, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4582400, "num_agent_steps_trained": 4582400, "num_env_steps_sampled": 2291200, "num_env_steps_trained": 2291200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2291200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4582400, "timers": {"training_iteration_time_ms": 3645.952, "learn_time_ms": 1141.641, "learn_throughput": 11211.928, "synch_weights_time_ms": 11.207}, "counters": {"num_env_steps_sampled": 2291200, "num_env_steps_trained": 2291200, "num_agent_steps_sampled": 4582400, "num_agent_steps_trained": 4582400}, "done": false, "episodes_total": 5728, "training_iteration": 179, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-34", "timestamp": 1666581094, "time_this_iter_s": 3.916750431060791, "time_total_s": 670.8698525428772, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 670.8698525428772, "timesteps_since_restore": 0, "iterations_since_restore": 179, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.25, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.98, "shaped_reward_min": 158, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.52, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.52, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.52, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010324614122509956, "policy_loss": -0.0012710680020973086, "vf_loss": 7.529854774475098, "vf_explained_var": 0.6815111041069031, "kl": 0.0017792152939364314, "entropy": 1.0287561416625977, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2304000, "num_env_steps_trained": 2304000, "num_agent_steps_sampled": 4608000, "num_agent_steps_trained": 4608000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 571.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 285.59}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.98, "shaped_reward_min": 158, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.52, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.52, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.52, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827948903707423, "mean_inference_ms": 1.2054504095198442, "mean_action_processing_ms": 0.13228362079130832, "mean_env_wait_ms": 0.8362570836763952, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 571.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 246.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 285.59}, "hist_stats": {"episode_reward": [582.0, 576.0, 579.0, 579.0, 579.0, 573.0, 570.0, 579.0, 576.0, 579.0, 630.0, 576.0, 576.0, 579.0, 579.0, 536.0, 573.0, 627.0, 587.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 573.0, 576.0, 582.0, 579.0, 570.0, 573.0, 582.0, 576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 275.0, 301.0, 279.0, 300.0, 289.0, 290.0, 293.0, 286.0, 298.0, 275.0, 277.0, 293.0, 293.0, 286.0, 288.0, 288.0, 282.0, 297.0, 321.0, 309.0, 292.0, 284.0, 283.0, 293.0, 288.0, 291.0, 299.0, 280.0, 278.0, 258.0, 286.0, 287.0, 322.0, 305.0, 288.0, 299.0, 274.0, 248.0, 294.0, 285.0, 280.0, 302.0, 293.0, 286.0, 296.0, 286.0, 293.0, 283.0, 296.0, 277.0, 305.0, 271.0, 276.0, 306.0, 279.0, 300.0, 282.0, 288.0, 283.0, 290.0, 293.0, 289.0, 301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827948903707423, "mean_inference_ms": 1.2054504095198442, "mean_action_processing_ms": 0.13228362079130832, "mean_env_wait_ms": 0.8362570836763952, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4608000, "num_agent_steps_trained": 4608000, "num_env_steps_sampled": 2304000, "num_env_steps_trained": 2304000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2304000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4608000, "timers": {"training_iteration_time_ms": 3650.521, "learn_time_ms": 1140.233, "learn_throughput": 11225.772, "synch_weights_time_ms": 11.194}, "counters": {"num_env_steps_sampled": 2304000, "num_env_steps_trained": 2304000, "num_agent_steps_sampled": 4608000, "num_agent_steps_trained": 4608000}, "done": false, "episodes_total": 5760, "training_iteration": 180, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-38", "timestamp": 1666581098, "time_this_iter_s": 3.7083230018615723, "time_total_s": 674.5781755447388, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 674.5781755447388, "timesteps_since_restore": 0, "iterations_since_restore": 180, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.28, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.2, "shaped_reward_min": 153, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.24, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.84, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.32, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.32, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.32, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019686259329319, "policy_loss": 0.0017239430453628302, "vf_loss": 7.565124034881592, "vf_explained_var": 0.6716065406799316, "kl": 0.00241199042648077, "entropy": 1.023657202720642, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2316800, "num_env_steps_trained": 2316800, "num_agent_steps_sampled": 4633600, "num_agent_steps_trained": 4633600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 493.0, "episode_reward_mean": 565.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 282.9}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.2, "shaped_reward_min": 153, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.24, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.84, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.32, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 15.32, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.32, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827979529664868, "mean_inference_ms": 1.2057173897529836, "mean_action_processing_ms": 0.13228127451561433, "mean_env_wait_ms": 0.8362816793672555, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 493.0, "episode_reward_mean": 565.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 243.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 282.9}, "hist_stats": {"episode_reward": [576.0, 519.0, 576.0, 579.0, 576.0, 570.0, 582.0, 573.0, 567.0, 579.0, 527.0, 582.0, 576.0, 525.0, 576.0, 582.0, 579.0, 533.0, 573.0, 573.0, 579.0, 573.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 522.0, 530.0, 576.0, 525.0, 558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 275.0, 269.0, 250.0, 279.0, 297.0, 290.0, 289.0, 295.0, 281.0, 285.0, 285.0, 289.0, 293.0, 279.0, 294.0, 282.0, 285.0, 286.0, 293.0, 246.0, 281.0, 296.0, 286.0, 293.0, 283.0, 253.0, 272.0, 287.0, 289.0, 291.0, 291.0, 285.0, 294.0, 268.0, 265.0, 281.0, 292.0, 282.0, 291.0, 282.0, 297.0, 299.0, 274.0, 289.0, 292.0, 286.0, 296.0, 289.0, 290.0, 295.0, 287.0, 272.0, 258.0, 290.0, 289.0, 260.0, 262.0, 261.0, 269.0, 276.0, 300.0, 259.0, 266.0, 290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827979529664868, "mean_inference_ms": 1.2057173897529836, "mean_action_processing_ms": 0.13228127451561433, "mean_env_wait_ms": 0.8362816793672555, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4633600, "num_agent_steps_trained": 4633600, "num_env_steps_sampled": 2316800, "num_env_steps_trained": 2316800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2316800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4633600, "timers": {"training_iteration_time_ms": 3652.906, "learn_time_ms": 1139.757, "learn_throughput": 11230.46, "synch_weights_time_ms": 11.425}, "counters": {"num_env_steps_sampled": 2316800, "num_env_steps_trained": 2316800, "num_agent_steps_sampled": 4633600, "num_agent_steps_trained": 4633600}, "done": false, "episodes_total": 5792, "training_iteration": 181, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-42", "timestamp": 1666581102, "time_this_iter_s": 3.7982163429260254, "time_total_s": 678.3763918876648, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 678.3763918876648, "timesteps_since_restore": 0, "iterations_since_restore": 181, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.06, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005980606656521559, "policy_loss": -0.0008600912988185883, "vf_loss": 7.714970588684082, "vf_explained_var": 0.6483292579650879, "kl": 0.0020733263809233904, "entropy": 1.0189299583435059, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2329600, "num_env_steps_trained": 2329600, "num_agent_steps_sampled": 4659200, "num_agent_steps_trained": 4659200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 561.46, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 280.73}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.06, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827873842497127, "mean_inference_ms": 1.20570575492883, "mean_action_processing_ms": 0.13227875312442067, "mean_env_wait_ms": 0.8361696516164074, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 561.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 280.73}, "hist_stats": {"episode_reward": [558.0, 579.0, 519.0, 582.0, 579.0, 587.0, 582.0, 573.0, 576.0, 519.0, 576.0, 582.0, 587.0, 576.0, 570.0, 579.0, 576.0, 533.0, 579.0, 561.0, 576.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 579.0, 582.0, 579.0, 564.0, 576.0, 533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 268.0, 278.0, 301.0, 257.0, 262.0, 284.0, 298.0, 291.0, 288.0, 301.0, 286.0, 292.0, 290.0, 285.0, 288.0, 293.0, 283.0, 270.0, 249.0, 297.0, 279.0, 281.0, 301.0, 293.0, 294.0, 297.0, 279.0, 294.0, 276.0, 287.0, 292.0, 278.0, 298.0, 285.0, 248.0, 301.0, 278.0, 287.0, 274.0, 294.0, 282.0, 288.0, 291.0, 270.0, 309.0, 296.0, 286.0, 288.0, 285.0, 289.0, 290.0, 290.0, 283.0, 286.0, 293.0, 278.0, 304.0, 290.0, 289.0, 285.0, 279.0, 303.0, 273.0, 266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827873842497127, "mean_inference_ms": 1.20570575492883, "mean_action_processing_ms": 0.13227875312442067, "mean_env_wait_ms": 0.8361696516164074, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4659200, "num_agent_steps_trained": 4659200, "num_env_steps_sampled": 2329600, "num_env_steps_trained": 2329600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2329600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4659200, "timers": {"training_iteration_time_ms": 3671.351, "learn_time_ms": 1151.599, "learn_throughput": 11114.977, "synch_weights_time_ms": 10.944}, "counters": {"num_env_steps_sampled": 2329600, "num_env_steps_trained": 2329600, "num_agent_steps_sampled": 4659200, "num_agent_steps_trained": 4659200}, "done": false, "episodes_total": 5824, "training_iteration": 182, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-46", "timestamp": 1666581106, "time_this_iter_s": 3.7461531162261963, "time_total_s": 682.122545003891, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 682.122545003891, "timesteps_since_restore": 0, "iterations_since_restore": 182, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.92, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00046833715168759227, "policy_loss": 0.0002174130640923977, "vf_loss": 7.621037006378174, "vf_explained_var": 0.6563353538513184, "kl": 0.001816212316043675, "entropy": 1.0223599672317505, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2342400, "num_env_steps_trained": 2342400, "num_agent_steps_sampled": 4684800, "num_agent_steps_trained": 4684800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 561.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 280.66}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.92, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.7, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827640519501001, "mean_inference_ms": 1.2056064329122549, "mean_action_processing_ms": 0.132274341413401, "mean_env_wait_ms": 0.8360329786459079, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 561.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 280.66}, "hist_stats": {"episode_reward": [533.0, 582.0, 579.0, 579.0, 522.0, 570.0, 493.0, 513.0, 570.0, 579.0, 530.0, 573.0, 570.0, 522.0, 576.0, 576.0, 573.0, 525.0, 579.0, 579.0, 522.0, 576.0, 576.0, 573.0, 573.0, 576.0, 567.0, 573.0, 582.0, 630.0, 579.0, 525.0, 519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 267.0, 291.0, 291.0, 291.0, 288.0, 291.0, 288.0, 257.0, 265.0, 294.0, 276.0, 243.0, 250.0, 261.0, 252.0, 289.0, 281.0, 281.0, 298.0, 279.0, 251.0, 295.0, 278.0, 279.0, 291.0, 257.0, 265.0, 293.0, 283.0, 280.0, 296.0, 281.0, 292.0, 254.0, 271.0, 298.0, 281.0, 288.0, 291.0, 251.0, 271.0, 287.0, 289.0, 286.0, 290.0, 285.0, 288.0, 293.0, 280.0, 278.0, 298.0, 291.0, 276.0, 290.0, 283.0, 291.0, 291.0, 325.0, 305.0, 294.0, 285.0, 272.0, 253.0, 246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827640519501001, "mean_inference_ms": 1.2056064329122549, "mean_action_processing_ms": 0.132274341413401, "mean_env_wait_ms": 0.8360329786459079, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4684800, "num_agent_steps_trained": 4684800, "num_env_steps_sampled": 2342400, "num_env_steps_trained": 2342400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2342400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4684800, "timers": {"training_iteration_time_ms": 3661.407, "learn_time_ms": 1135.083, "learn_throughput": 11276.706, "synch_weights_time_ms": 11.507}, "counters": {"num_env_steps_sampled": 2342400, "num_env_steps_trained": 2342400, "num_agent_steps_sampled": 4684800, "num_agent_steps_trained": 4684800}, "done": false, "episodes_total": 5856, "training_iteration": 183, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-50", "timestamp": 1666581110, "time_this_iter_s": 3.645003080368042, "time_total_s": 685.767548084259, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 685.767548084259, "timesteps_since_restore": 0, "iterations_since_restore": 183, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.466666666666665, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 171.94, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.48, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024932546075433493, "policy_loss": 0.0022324277088046074, "vf_loss": 7.732563495635986, "vf_explained_var": 0.6382303833961487, "kl": 0.0018180841580033302, "entropy": 1.0248595476150513, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2355200, "num_env_steps_trained": 2355200, "num_agent_steps_sampled": 4710400, "num_agent_steps_trained": 4710400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 557.14, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 278.57}, "custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 171.94, "shaped_reward_min": 127, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.48, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682712178829132, "mean_inference_ms": 1.2054880729211481, "mean_action_processing_ms": 0.13226720987812923, "mean_env_wait_ms": 0.8358785654687424, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 327.0, "episode_reward_mean": 557.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 278.57}, "hist_stats": {"episode_reward": [519.0, 579.0, 579.0, 573.0, 582.0, 570.0, 521.0, 527.0, 576.0, 425.0, 576.0, 582.0, 327.0, 579.0, 573.0, 576.0, 530.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 576.0, 522.0, 570.0, 579.0, 519.0, 576.0, 573.0, 573.0, 579.0, 576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [246.0, 273.0, 284.0, 295.0, 293.0, 286.0, 289.0, 284.0, 293.0, 289.0, 282.0, 288.0, 259.0, 262.0, 262.0, 265.0, 290.0, 286.0, 205.0, 220.0, 276.0, 300.0, 283.0, 299.0, 170.0, 157.0, 290.0, 289.0, 288.0, 285.0, 283.0, 293.0, 265.0, 265.0, 289.0, 284.0, 296.0, 280.0, 290.0, 283.0, 285.0, 288.0, 304.0, 275.0, 299.0, 280.0, 283.0, 293.0, 252.0, 270.0, 293.0, 277.0, 289.0, 290.0, 256.0, 263.0, 296.0, 280.0, 293.0, 280.0, 293.0, 280.0, 283.0, 296.0, 288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682712178829132, "mean_inference_ms": 1.2054880729211481, "mean_action_processing_ms": 0.13226720987812923, "mean_env_wait_ms": 0.8358785654687424, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4710400, "num_agent_steps_trained": 4710400, "num_env_steps_sampled": 2355200, "num_env_steps_trained": 2355200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2355200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4710400, "timers": {"training_iteration_time_ms": 3667.443, "learn_time_ms": 1140.231, "learn_throughput": 11225.797, "synch_weights_time_ms": 11.702}, "counters": {"num_env_steps_sampled": 2355200, "num_env_steps_trained": 2355200, "num_agent_steps_sampled": 4710400, "num_agent_steps_trained": 4710400}, "done": false, "episodes_total": 5888, "training_iteration": 184, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-54", "timestamp": 1666581114, "time_this_iter_s": 3.6834990978240967, "time_total_s": 689.4510471820831, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 689.4510471820831, "timesteps_since_restore": 0, "iterations_since_restore": 184, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.52, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.48, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.05, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.43, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.43, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.43, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012522500474005938, "policy_loss": 0.0009863225277513266, "vf_loss": 7.730276584625244, "vf_explained_var": 0.6408652663230896, "kl": 0.002116520656272769, "entropy": 1.0141997337341309, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2368000, "num_env_steps_trained": 2368000, "num_agent_steps_sampled": 4736000, "num_agent_steps_trained": 4736000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 430.0, "episode_reward_mean": 557.08, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 212.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 278.54}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.48, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.05, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.43, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.43, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.43, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827000572252169, "mean_inference_ms": 1.2053664161777287, "mean_action_processing_ms": 0.1322612138205102, "mean_env_wait_ms": 0.835721045762788, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 430.0, "episode_reward_mean": 557.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 212.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 278.54}, "hist_stats": {"episode_reward": [576.0, 530.0, 430.0, 627.0, 582.0, 582.0, 576.0, 576.0, 570.0, 576.0, 573.0, 579.0, 576.0, 573.0, 573.0, 573.0, 573.0, 564.0, 579.0, 576.0, 576.0, 576.0, 579.0, 582.0, 579.0, 576.0, 576.0, 573.0, 570.0, 530.0, 579.0, 582.0, 530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 269.0, 261.0, 218.0, 212.0, 312.0, 315.0, 298.0, 284.0, 289.0, 293.0, 290.0, 286.0, 277.0, 299.0, 288.0, 282.0, 285.0, 291.0, 286.0, 287.0, 303.0, 276.0, 296.0, 280.0, 277.0, 296.0, 288.0, 285.0, 287.0, 286.0, 303.0, 270.0, 297.0, 267.0, 280.0, 299.0, 285.0, 291.0, 299.0, 277.0, 284.0, 292.0, 290.0, 289.0, 290.0, 292.0, 283.0, 296.0, 283.0, 293.0, 287.0, 289.0, 288.0, 285.0, 298.0, 272.0, 265.0, 265.0, 289.0, 290.0, 278.0, 304.0, 270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6827000572252169, "mean_inference_ms": 1.2053664161777287, "mean_action_processing_ms": 0.1322612138205102, "mean_env_wait_ms": 0.835721045762788, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4736000, "num_agent_steps_trained": 4736000, "num_env_steps_sampled": 2368000, "num_env_steps_trained": 2368000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2368000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4736000, "timers": {"training_iteration_time_ms": 3656.599, "learn_time_ms": 1129.782, "learn_throughput": 11329.616, "synch_weights_time_ms": 12.079}, "counters": {"num_env_steps_sampled": 2368000, "num_env_steps_trained": 2368000, "num_agent_steps_sampled": 4736000, "num_agent_steps_trained": 4736000}, "done": false, "episodes_total": 5920, "training_iteration": 185, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-11-57", "timestamp": 1666581117, "time_this_iter_s": 3.6200082302093506, "time_total_s": 693.0710554122925, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 693.0710554122925, "timesteps_since_restore": 0, "iterations_since_restore": 185, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.933333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 170.0, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.78, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013259206898510456, "policy_loss": 0.001057651941664517, "vf_loss": 7.706806659698486, "vf_explained_var": 0.6470644474029541, "kl": 0.003712405450642109, "entropy": 1.0048234462738037, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2380800, "num_env_steps_trained": 2380800, "num_agent_steps_sampled": 4761600, "num_agent_steps_trained": 4761600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 551.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.8}, "custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 170.0, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.78, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6826704429125031, "mean_inference_ms": 1.2052526937150345, "mean_action_processing_ms": 0.13225667069598454, "mean_env_wait_ms": 0.8355802940746705, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 551.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.8}, "hist_stats": {"episode_reward": [530.0, 522.0, 576.0, 573.0, 522.0, 533.0, 530.0, 570.0, 522.0, 530.0, 516.0, 582.0, 498.0, 579.0, 525.0, 522.0, 630.0, 573.0, 582.0, 441.0, 573.0, 579.0, 573.0, 584.0, 576.0, 573.0, 527.0, 579.0, 510.0, 576.0, 481.0, 525.0, 527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 260.0, 243.0, 279.0, 279.0, 297.0, 292.0, 281.0, 255.0, 267.0, 266.0, 267.0, 281.0, 249.0, 282.0, 288.0, 262.0, 260.0, 259.0, 271.0, 257.0, 259.0, 292.0, 290.0, 247.0, 251.0, 286.0, 293.0, 262.0, 263.0, 258.0, 264.0, 316.0, 314.0, 289.0, 284.0, 288.0, 294.0, 215.0, 226.0, 299.0, 274.0, 285.0, 294.0, 288.0, 285.0, 294.0, 290.0, 288.0, 288.0, 282.0, 291.0, 274.0, 253.0, 283.0, 296.0, 241.0, 269.0, 298.0, 278.0, 240.0, 241.0, 263.0, 262.0, 270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6826704429125031, "mean_inference_ms": 1.2052526937150345, "mean_action_processing_ms": 0.13225667069598454, "mean_env_wait_ms": 0.8355802940746705, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4761600, "num_agent_steps_trained": 4761600, "num_env_steps_sampled": 2380800, "num_env_steps_trained": 2380800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2380800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4761600, "timers": {"training_iteration_time_ms": 3653.653, "learn_time_ms": 1127.739, "learn_throughput": 11350.146, "synch_weights_time_ms": 11.485}, "counters": {"num_env_steps_sampled": 2380800, "num_env_steps_trained": 2380800, "num_agent_steps_sampled": 4761600, "num_agent_steps_trained": 4761600}, "done": false, "episodes_total": 5952, "training_iteration": 186, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-01", "timestamp": 1666581121, "time_this_iter_s": 3.6360175609588623, "time_total_s": 696.7070729732513, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 696.7070729732513, "timesteps_since_restore": 0, "iterations_since_restore": 186, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.339999999999996, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.61, "shaped_reward_min": 136, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003195032477378845, "policy_loss": -0.0005675801075994968, "vf_loss": 7.603795051574707, "vf_explained_var": 0.6576536893844604, "kl": 0.0021047075279057026, "entropy": 1.0246057510375977, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2393600, "num_env_steps_trained": 2393600, "num_agent_steps_sampled": 4787200, "num_agent_steps_trained": 4787200}, "sampler_results": {"episode_reward_max": 584.0, "episode_reward_min": 416.0, "episode_reward_mean": 555.81, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 277.905}, "custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.61, "shaped_reward_min": 136, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.57, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.08, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68267583764297, "mean_inference_ms": 1.2051792659431373, "mean_action_processing_ms": 0.13225872711682485, "mean_env_wait_ms": 0.8354850608241722, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 584.0, "episode_reward_min": 416.0, "episode_reward_mean": 555.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 277.905}, "hist_stats": {"episode_reward": [527.0, 582.0, 579.0, 558.0, 522.0, 579.0, 579.0, 579.0, 570.0, 516.0, 570.0, 576.0, 573.0, 525.0, 581.0, 519.0, 584.0, 465.0, 524.0, 530.0, 573.0, 576.0, 513.0, 579.0, 516.0, 573.0, 522.0, 465.0, 573.0, 573.0, 576.0, 582.0, 582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 257.0, 281.0, 301.0, 293.0, 286.0, 284.0, 274.0, 265.0, 257.0, 287.0, 292.0, 299.0, 280.0, 280.0, 299.0, 285.0, 285.0, 249.0, 267.0, 290.0, 280.0, 287.0, 289.0, 290.0, 283.0, 273.0, 252.0, 298.0, 283.0, 265.0, 254.0, 298.0, 286.0, 224.0, 241.0, 257.0, 267.0, 264.0, 266.0, 289.0, 284.0, 278.0, 298.0, 260.0, 253.0, 293.0, 286.0, 259.0, 257.0, 290.0, 283.0, 270.0, 252.0, 216.0, 249.0, 295.0, 278.0, 291.0, 282.0, 282.0, 294.0, 288.0, 294.0, 287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.68267583764297, "mean_inference_ms": 1.2051792659431373, "mean_action_processing_ms": 0.13225872711682485, "mean_env_wait_ms": 0.8354850608241722, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4787200, "num_agent_steps_trained": 4787200, "num_env_steps_sampled": 2393600, "num_env_steps_trained": 2393600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2393600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4787200, "timers": {"training_iteration_time_ms": 3661.62, "learn_time_ms": 1133.198, "learn_throughput": 11295.462, "synch_weights_time_ms": 11.112}, "counters": {"num_env_steps_sampled": 2393600, "num_env_steps_trained": 2393600, "num_agent_steps_sampled": 4787200, "num_agent_steps_trained": 4787200}, "done": false, "episodes_total": 5984, "training_iteration": 187, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-05", "timestamp": 1666581125, "time_this_iter_s": 3.723355770111084, "time_total_s": 700.4304287433624, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 700.4304287433624, "timesteps_since_restore": 0, "iterations_since_restore": 187, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.0, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.29, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.13, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006394333904609084, "policy_loss": -0.0008913751225918531, "vf_loss": 7.587137222290039, "vf_explained_var": 0.6698799133300781, "kl": 0.0019602831453084946, "entropy": 1.013542652130127, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2406400, "num_env_steps_trained": 2406400, "num_agent_steps_sampled": 4812800, "num_agent_steps_trained": 4812800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 556.49, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 278.245}, "custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.29, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.18, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.45, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.13, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682641226773663, "mean_inference_ms": 1.2050772392466162, "mean_action_processing_ms": 0.13225750950538415, "mean_env_wait_ms": 0.8353663191948928, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 556.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 278.245}, "hist_stats": {"episode_reward": [582.0, 570.0, 570.0, 573.0, 582.0, 539.0, 519.0, 579.0, 573.0, 567.0, 573.0, 579.0, 576.0, 582.0, 564.0, 579.0, 573.0, 524.0, 530.0, 573.0, 582.0, 525.0, 579.0, 576.0, 498.0, 525.0, 519.0, 573.0, 576.0, 579.0, 579.0, 533.0, 570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 280.0, 290.0, 288.0, 282.0, 293.0, 280.0, 277.0, 305.0, 272.0, 267.0, 250.0, 269.0, 304.0, 275.0, 292.0, 281.0, 270.0, 297.0, 290.0, 283.0, 299.0, 280.0, 277.0, 299.0, 287.0, 295.0, 289.0, 275.0, 286.0, 293.0, 286.0, 287.0, 268.0, 256.0, 267.0, 263.0, 293.0, 280.0, 296.0, 286.0, 252.0, 273.0, 290.0, 289.0, 278.0, 298.0, 255.0, 243.0, 269.0, 256.0, 265.0, 254.0, 299.0, 274.0, 282.0, 294.0, 288.0, 291.0, 298.0, 281.0, 261.0, 272.0, 274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.682641226773663, "mean_inference_ms": 1.2050772392466162, "mean_action_processing_ms": 0.13225750950538415, "mean_env_wait_ms": 0.8353663191948928, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4812800, "num_agent_steps_trained": 4812800, "num_env_steps_sampled": 2406400, "num_env_steps_trained": 2406400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2406400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4812800, "timers": {"training_iteration_time_ms": 3668.657, "learn_time_ms": 1140.068, "learn_throughput": 11227.397, "synch_weights_time_ms": 11.499}, "counters": {"num_env_steps_sampled": 2406400, "num_env_steps_trained": 2406400, "num_agent_steps_sampled": 4812800, "num_agent_steps_trained": 4812800}, "done": false, "episodes_total": 6016, "training_iteration": 188, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-09", "timestamp": 1666581129, "time_this_iter_s": 3.769735813140869, "time_total_s": 704.2001645565033, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 704.2001645565033, "timesteps_since_restore": 0, "iterations_since_restore": 188, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.479999999999997, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 170.97, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.84, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.76, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.57, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.57, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.57, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001346064847894013, "policy_loss": 0.0010941242799162865, "vf_loss": 7.551443099975586, "vf_explained_var": 0.6755005121231079, "kl": 0.0019845583010464907, "entropy": 1.00640869140625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2419200, "num_env_steps_trained": 2419200, "num_agent_steps_sampled": 4838400, "num_agent_steps_trained": 4838400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 555.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 277.685}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 170.97, "shaped_reward_min": 136, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.84, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.76, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.57, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.57, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.57, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6826179017430108, "mean_inference_ms": 1.2049754873065845, "mean_action_processing_ms": 0.1322529508434523, "mean_env_wait_ms": 0.8352187465180257, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 416.0, "episode_reward_mean": 555.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 277.685}, "hist_stats": {"episode_reward": [570.0, 470.0, 582.0, 416.0, 576.0, 570.0, 576.0, 579.0, 582.0, 516.0, 576.0, 573.0, 564.0, 576.0, 582.0, 513.0, 519.0, 578.0, 573.0, 516.0, 582.0, 570.0, 579.0, 570.0, 573.0, 576.0, 516.0, 576.0, 579.0, 573.0, 530.0, 584.0, 525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 296.0, 222.0, 248.0, 293.0, 289.0, 215.0, 201.0, 294.0, 282.0, 281.0, 289.0, 281.0, 295.0, 300.0, 279.0, 291.0, 291.0, 258.0, 258.0, 283.0, 293.0, 283.0, 290.0, 282.0, 282.0, 303.0, 273.0, 306.0, 276.0, 255.0, 258.0, 273.0, 246.0, 272.0, 306.0, 294.0, 279.0, 253.0, 263.0, 296.0, 286.0, 294.0, 276.0, 291.0, 288.0, 278.0, 292.0, 286.0, 287.0, 291.0, 285.0, 256.0, 260.0, 286.0, 290.0, 286.0, 293.0, 287.0, 286.0, 263.0, 267.0, 296.0, 288.0, 262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6826179017430108, "mean_inference_ms": 1.2049754873065845, "mean_action_processing_ms": 0.1322529508434523, "mean_env_wait_ms": 0.8352187465180257, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4838400, "num_agent_steps_trained": 4838400, "num_env_steps_sampled": 2419200, "num_env_steps_trained": 2419200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2419200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4838400, "timers": {"training_iteration_time_ms": 3634.315, "learn_time_ms": 1128.996, "learn_throughput": 11337.511, "synch_weights_time_ms": 11.43}, "counters": {"num_env_steps_sampled": 2419200, "num_env_steps_trained": 2419200, "num_agent_steps_sampled": 4838400, "num_agent_steps_trained": 4838400}, "done": false, "episodes_total": 6048, "training_iteration": 189, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-13", "timestamp": 1666581133, "time_this_iter_s": 3.596635580062866, "time_total_s": 707.7968001365662, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 707.7968001365662, "timesteps_since_restore": 0, "iterations_since_restore": 189, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.979999999999997, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 169.87, "shaped_reward_min": 132, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.22, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.63, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.8, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.63, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.63, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.63, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009595003793947399, "policy_loss": -0.0012168899411335588, "vf_loss": 7.624897003173828, "vf_explained_var": 0.659218430519104, "kl": 0.0018678703345358372, "entropy": 1.0101993083953857, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2432000, "num_env_steps_trained": 2432000, "num_agent_steps_sampled": 4864000, "num_agent_steps_trained": 4864000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 419.0, "episode_reward_mean": 553.07, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 276.535}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 169.87, "shaped_reward_min": 132, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.22, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.63, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.8, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.63, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.14, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.63, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.63, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844611466940688, "mean_inference_ms": 1.2103155404175103, "mean_action_processing_ms": 0.13240161079494261, "mean_env_wait_ms": 0.8363401360482277, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 419.0, "episode_reward_mean": 553.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 276.535}, "hist_stats": {"episode_reward": [525.0, 522.0, 530.0, 579.0, 525.0, 573.0, 530.0, 630.0, 576.0, 525.0, 570.0, 522.0, 570.0, 579.0, 573.0, 576.0, 522.0, 579.0, 582.0, 530.0, 539.0, 522.0, 573.0, 579.0, 522.0, 487.0, 558.0, 465.0, 525.0, 573.0, 579.0, 530.0, 582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 256.0, 266.0, 270.0, 260.0, 291.0, 288.0, 257.0, 268.0, 289.0, 284.0, 265.0, 265.0, 315.0, 315.0, 298.0, 278.0, 261.0, 264.0, 272.0, 298.0, 254.0, 268.0, 309.0, 261.0, 285.0, 294.0, 276.0, 297.0, 295.0, 281.0, 255.0, 267.0, 290.0, 289.0, 305.0, 277.0, 267.0, 263.0, 267.0, 272.0, 266.0, 256.0, 285.0, 288.0, 278.0, 301.0, 250.0, 272.0, 235.0, 252.0, 284.0, 274.0, 222.0, 243.0, 279.0, 246.0, 291.0, 282.0, 287.0, 292.0, 268.0, 262.0, 300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6844611466940688, "mean_inference_ms": 1.2103155404175103, "mean_action_processing_ms": 0.13240161079494261, "mean_env_wait_ms": 0.8363401360482277, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4864000, "num_agent_steps_trained": 4864000, "num_env_steps_sampled": 2432000, "num_env_steps_trained": 2432000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2432000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4864000, "timers": {"training_iteration_time_ms": 4318.739, "learn_time_ms": 1136.065, "learn_throughput": 11266.96, "synch_weights_time_ms": 11.862}, "counters": {"num_env_steps_sampled": 2432000, "num_env_steps_trained": 2432000, "num_agent_steps_sampled": 4864000, "num_agent_steps_trained": 4864000}, "done": false, "episodes_total": 6080, "training_iteration": 190, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-23", "timestamp": 1666581143, "time_this_iter_s": 10.565764904022217, "time_total_s": 718.3625650405884, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 718.3625650405884, "timesteps_since_restore": 0, "iterations_since_restore": 190, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 83.35, "ram_util_percent": 17.51875}} +{"custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.62, "shaped_reward_min": 132, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.87, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.000840085675008595, "policy_loss": -0.0011022280668839812, "vf_loss": 7.6430559158325195, "vf_explained_var": 0.6706925630569458, "kl": 0.0019743097946047783, "entropy": 1.0043245553970337, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2444800, "num_env_steps_trained": 2444800, "num_agent_steps_sampled": 4889600, "num_agent_steps_trained": 4889600}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 419.0, "episode_reward_mean": 556.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 278.11}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.62, "shaped_reward_min": 132, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.87, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.12, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866233248561839, "mean_inference_ms": 1.2159121953751708, "mean_action_processing_ms": 0.13259290392719372, "mean_env_wait_ms": 0.8377164271221949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 419.0, "episode_reward_mean": 556.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 193.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 278.11}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 576.0, 582.0, 579.0, 573.0, 579.0, 579.0, 576.0, 527.0, 579.0, 530.0, 573.0, 521.0, 516.0, 576.0, 573.0, 579.0, 573.0, 573.0, 576.0, 519.0, 579.0, 582.0, 579.0, 419.0, 519.0, 570.0, 519.0, 587.0, 576.0, 522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 282.0, 278.0, 301.0, 304.0, 272.0, 294.0, 282.0, 301.0, 281.0, 296.0, 283.0, 272.0, 301.0, 283.0, 296.0, 288.0, 291.0, 301.0, 275.0, 246.0, 281.0, 291.0, 288.0, 266.0, 264.0, 282.0, 291.0, 271.0, 250.0, 249.0, 267.0, 280.0, 296.0, 297.0, 276.0, 285.0, 294.0, 293.0, 280.0, 286.0, 287.0, 291.0, 285.0, 261.0, 258.0, 308.0, 271.0, 288.0, 294.0, 283.0, 296.0, 193.0, 226.0, 258.0, 261.0, 287.0, 283.0, 263.0, 256.0, 301.0, 286.0, 290.0, 286.0, 254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6866233248561839, "mean_inference_ms": 1.2159121953751708, "mean_action_processing_ms": 0.13259290392719372, "mean_env_wait_ms": 0.8377164271221949, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4889600, "num_agent_steps_trained": 4889600, "num_env_steps_sampled": 2444800, "num_env_steps_trained": 2444800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2444800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4889600, "timers": {"training_iteration_time_ms": 4368.115, "learn_time_ms": 1141.284, "learn_throughput": 11215.439, "synch_weights_time_ms": 12.131}, "counters": {"num_env_steps_sampled": 2444800, "num_env_steps_trained": 2444800, "num_agent_steps_sampled": 4889600, "num_agent_steps_trained": 4889600}, "done": false, "episodes_total": 6112, "training_iteration": 191, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-28", "timestamp": 1666581148, "time_this_iter_s": 4.299537658691406, "time_total_s": 722.6621026992798, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 722.6621026992798, "timesteps_since_restore": 0, "iterations_since_restore": 191, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 45.98333333333334, "ram_util_percent": 20.7}} +{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.02, "shaped_reward_min": 132, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 3.6748824641108513e-06, "policy_loss": -0.0002568246563896537, "vf_loss": 7.626540660858154, "vf_explained_var": 0.6705838441848755, "kl": 0.0022698636166751385, "entropy": 1.0043076276779175, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2457600, "num_env_steps_trained": 2457600, "num_agent_steps_sampled": 4915200, "num_agent_steps_trained": 4915200}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 452.0, "episode_reward_mean": 557.82, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 278.91}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.02, "shaped_reward_min": 132, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6890653996220631, "mean_inference_ms": 1.2217288104133466, "mean_action_processing_ms": 0.13282370029567642, "mean_env_wait_ms": 0.839345002381688, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 452.0, "episode_reward_mean": 557.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 278.91}, "hist_stats": {"episode_reward": [522.0, 576.0, 558.0, 570.0, 570.0, 522.0, 573.0, 519.0, 576.0, 573.0, 570.0, 527.0, 522.0, 587.0, 579.0, 573.0, 564.0, 510.0, 452.0, 519.0, 504.0, 573.0, 525.0, 582.0, 573.0, 519.0, 576.0, 510.0, 573.0, 570.0, 573.0, 533.0, 570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 292.0, 284.0, 275.0, 283.0, 275.0, 295.0, 291.0, 279.0, 261.0, 261.0, 282.0, 291.0, 266.0, 253.0, 282.0, 294.0, 274.0, 299.0, 273.0, 297.0, 285.0, 242.0, 260.0, 262.0, 299.0, 288.0, 294.0, 285.0, 298.0, 275.0, 275.0, 289.0, 260.0, 250.0, 223.0, 229.0, 253.0, 266.0, 243.0, 261.0, 278.0, 295.0, 277.0, 248.0, 298.0, 284.0, 286.0, 287.0, 247.0, 272.0, 280.0, 296.0, 263.0, 247.0, 294.0, 279.0, 278.0, 292.0, 287.0, 286.0, 288.0, 245.0, 286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6890653996220631, "mean_inference_ms": 1.2217288104133466, "mean_action_processing_ms": 0.13282370029567642, "mean_env_wait_ms": 0.839345002381688, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4915200, "num_agent_steps_trained": 4915200, "num_env_steps_sampled": 2457600, "num_env_steps_trained": 2457600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2457600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4915200, "timers": {"training_iteration_time_ms": 4412.084, "learn_time_ms": 1145.822, "learn_throughput": 11171.018, "synch_weights_time_ms": 12.484}, "counters": {"num_env_steps_sampled": 2457600, "num_env_steps_trained": 2457600, "num_agent_steps_sampled": 4915200, "num_agent_steps_trained": 4915200}, "done": false, "episodes_total": 6144, "training_iteration": 192, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-32", "timestamp": 1666581152, "time_this_iter_s": 4.201019287109375, "time_total_s": 726.8631219863892, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 726.8631219863892, "timesteps_since_restore": 0, "iterations_since_restore": 192, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 43.0, "ram_util_percent": 21.433333333333334}} +{"custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 172.61, "shaped_reward_min": 133, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.07, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.52, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.37, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.37, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.37, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00027986563509330153, "policy_loss": -0.0005315254675224423, "vf_loss": 7.590443134307861, "vf_explained_var": 0.667829155921936, "kl": 0.001756403362378478, "entropy": 1.0147672891616821, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2470400, "num_env_steps_trained": 2470400, "num_agent_steps_sampled": 4940800, "num_agent_steps_trained": 4940800}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 453.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 280.105}, "custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 172.61, "shaped_reward_min": 133, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.07, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.52, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.37, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.37, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.37, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6900892375397361, "mean_inference_ms": 1.2230523595362317, "mean_action_processing_ms": 0.1329422939345136, "mean_env_wait_ms": 0.8401157934099642, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 453.0, "episode_reward_mean": 560.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 310.0}, "policy_reward_mean": {"ppo": 280.105}, "hist_stats": {"episode_reward": [570.0, 522.0, 570.0, 576.0, 579.0, 567.0, 576.0, 576.0, 522.0, 579.0, 576.0, 584.0, 507.0, 579.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 573.0, 582.0, 522.0, 579.0, 579.0, 468.0, 522.0, 570.0, 579.0, 573.0, 582.0, 539.0, 525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 284.0, 272.0, 250.0, 290.0, 280.0, 286.0, 290.0, 290.0, 289.0, 286.0, 281.0, 266.0, 310.0, 280.0, 296.0, 265.0, 257.0, 292.0, 287.0, 297.0, 279.0, 304.0, 280.0, 247.0, 260.0, 293.0, 286.0, 274.0, 299.0, 304.0, 275.0, 292.0, 290.0, 280.0, 296.0, 300.0, 276.0, 287.0, 289.0, 290.0, 283.0, 287.0, 295.0, 256.0, 266.0, 285.0, 294.0, 284.0, 295.0, 230.0, 238.0, 258.0, 264.0, 283.0, 287.0, 303.0, 276.0, 274.0, 299.0, 299.0, 283.0, 270.0, 269.0, 262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6900892375397361, "mean_inference_ms": 1.2230523595362317, "mean_action_processing_ms": 0.1329422939345136, "mean_env_wait_ms": 0.8401157934099642, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4940800, "num_agent_steps_trained": 4940800, "num_env_steps_sampled": 2470400, "num_env_steps_trained": 2470400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2470400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4940800, "timers": {"training_iteration_time_ms": 4481.05, "learn_time_ms": 1158.916, "learn_throughput": 11044.805, "synch_weights_time_ms": 11.827}, "counters": {"num_env_steps_sampled": 2470400, "num_env_steps_trained": 2470400, "num_agent_steps_sampled": 4940800, "num_agent_steps_trained": 4940800}, "done": false, "episodes_total": 6176, "training_iteration": 193, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-37", "timestamp": 1666581157, "time_this_iter_s": 4.319738388061523, "time_total_s": 731.1828603744507, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 731.1828603744507, "timesteps_since_restore": 0, "iterations_since_restore": 193, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 33.17142857142857, "ram_util_percent": 21.985714285714288}} +{"custom_metrics": {"sparse_reward_mean": 192.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.63, "shaped_reward_min": 133, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.17, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.94, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.94, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.94, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005898141534999013, "policy_loss": 0.00032977614318951964, "vf_loss": 7.652249813079834, "vf_explained_var": 0.6789402365684509, "kl": 0.0019943215884268284, "entropy": 1.0103744268417358, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2483200, "num_env_steps_trained": 2483200, "num_agent_steps_sampled": 4966400, "num_agent_steps_trained": 4966400}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 453.0, "episode_reward_mean": 556.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 278.215}, "custom_metrics": {"sparse_reward_mean": 192.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.63, "shaped_reward_min": 133, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.9, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.17, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.22, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.94, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.22, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.94, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.22, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.94, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6908932370553954, "mean_inference_ms": 1.2239034901785653, "mean_action_processing_ms": 0.13303384998747472, "mean_env_wait_ms": 0.8406845568626884, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 453.0, "episode_reward_mean": 556.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 220.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 278.215}, "hist_stats": {"episode_reward": [525.0, 576.0, 490.0, 519.0, 576.0, 579.0, 582.0, 530.0, 576.0, 525.0, 519.0, 570.0, 575.0, 530.0, 576.0, 579.0, 576.0, 573.0, 579.0, 582.0, 579.0, 570.0, 453.0, 525.0, 579.0, 576.0, 573.0, 573.0, 573.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 290.0, 286.0, 248.0, 242.0, 261.0, 258.0, 277.0, 299.0, 293.0, 286.0, 285.0, 297.0, 265.0, 265.0, 295.0, 281.0, 266.0, 259.0, 262.0, 257.0, 285.0, 285.0, 286.0, 289.0, 260.0, 270.0, 291.0, 285.0, 297.0, 282.0, 290.0, 286.0, 283.0, 290.0, 292.0, 287.0, 284.0, 298.0, 304.0, 275.0, 294.0, 276.0, 233.0, 220.0, 250.0, 275.0, 283.0, 296.0, 290.0, 286.0, 276.0, 297.0, 290.0, 283.0, 287.0, 286.0, 292.0, 284.0, 284.0, 295.0, 291.0, 296.0, 278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6908932370553954, "mean_inference_ms": 1.2239034901785653, "mean_action_processing_ms": 0.13303384998747472, "mean_env_wait_ms": 0.8406845568626884, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4966400, "num_agent_steps_trained": 4966400, "num_env_steps_sampled": 2483200, "num_env_steps_trained": 2483200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2483200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4966400, "timers": {"training_iteration_time_ms": 4557.46, "learn_time_ms": 1181.905, "learn_throughput": 10829.973, "synch_weights_time_ms": 12.829}, "counters": {"num_env_steps_sampled": 2483200, "num_env_steps_trained": 2483200, "num_agent_steps_sampled": 4966400, "num_agent_steps_trained": 4966400}, "done": false, "episodes_total": 6208, "training_iteration": 194, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-42", "timestamp": 1666581162, "time_this_iter_s": 4.991002321243286, "time_total_s": 736.173862695694, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 736.173862695694, "timesteps_since_restore": 0, "iterations_since_restore": 194, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 66.88571428571429, "ram_util_percent": 15.585714285714284}} +{"custom_metrics": {"sparse_reward_mean": 192.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.24, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.71, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.36, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001025318168103695, "policy_loss": -0.001293760840781033, "vf_loss": 7.650734901428223, "vf_explained_var": 0.6610172390937805, "kl": 0.0021573223639279604, "entropy": 0.993259072303772, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2496000, "num_env_steps_trained": 2496000, "num_agent_steps_sampled": 4992000, "num_agent_steps_trained": 4992000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 556.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 278.02}, "custom_metrics": {"sparse_reward_mean": 192.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.24, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.71, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.36, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.39, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.62, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.35, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.1, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928564263382961, "mean_inference_ms": 1.2294020123590537, "mean_action_processing_ms": 0.13325836540923008, "mean_env_wait_ms": 0.8421654977463985, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 556.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 308.0}, "policy_reward_mean": {"ppo": 278.02}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 582.0, 576.0, 525.0, 525.0, 573.0, 576.0, 525.0, 525.0, 576.0, 573.0, 573.0, 573.0, 522.0, 573.0, 519.0, 573.0, 525.0, 576.0, 579.0, 576.0, 522.0, 522.0, 570.0, 579.0, 530.0, 582.0, 530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 304.0, 304.0, 278.0, 289.0, 290.0, 290.0, 283.0, 303.0, 273.0, 292.0, 281.0, 289.0, 293.0, 295.0, 281.0, 269.0, 256.0, 255.0, 270.0, 292.0, 281.0, 277.0, 299.0, 246.0, 279.0, 265.0, 260.0, 300.0, 276.0, 289.0, 284.0, 306.0, 267.0, 283.0, 290.0, 256.0, 266.0, 284.0, 289.0, 264.0, 255.0, 284.0, 289.0, 260.0, 265.0, 292.0, 284.0, 288.0, 291.0, 279.0, 297.0, 269.0, 253.0, 263.0, 259.0, 278.0, 292.0, 290.0, 289.0, 258.0, 272.0, 296.0, 286.0, 278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928564263382961, "mean_inference_ms": 1.2294020123590537, "mean_action_processing_ms": 0.13325836540923008, "mean_env_wait_ms": 0.8421654977463985, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 4992000, "num_agent_steps_trained": 4992000, "num_env_steps_sampled": 2496000, "num_env_steps_trained": 2496000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2496000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 4992000, "timers": {"training_iteration_time_ms": 5042.959, "learn_time_ms": 1197.253, "learn_throughput": 10691.143, "synch_weights_time_ms": 12.679}, "counters": {"num_env_steps_sampled": 2496000, "num_env_steps_trained": 2496000, "num_agent_steps_sampled": 4992000, "num_agent_steps_trained": 4992000}, "done": false, "episodes_total": 6240, "training_iteration": 195, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-51", "timestamp": 1666581171, "time_this_iter_s": 8.491342067718506, "time_total_s": 744.6652047634125, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 744.6652047634125, "timesteps_since_restore": 0, "iterations_since_restore": 195, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 83.90833333333333, "ram_util_percent": 18.958333333333336}} +{"custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 170.88, "shaped_reward_min": 145, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.82, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011484096758067608, "policy_loss": 0.0008887922158464789, "vf_loss": 7.625007629394531, "vf_explained_var": 0.6411457061767578, "kl": 0.0023449528962373734, "entropy": 1.0057647228240967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2508800, "num_env_steps_trained": 2508800, "num_agent_steps_sampled": 5017600, "num_agent_steps_trained": 5017600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 465.0, "episode_reward_mean": 554.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.24}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 170.88, "shaped_reward_min": 145, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.82, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.2, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.95, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.12, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.95, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.95, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950939124240252, "mean_inference_ms": 1.2349222522925547, "mean_action_processing_ms": 0.13351355067475765, "mean_env_wait_ms": 0.8436288772371432, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 465.0, "episode_reward_mean": 554.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.24}, "hist_stats": {"episode_reward": [530.0, 525.0, 582.0, 576.0, 576.0, 522.0, 522.0, 516.0, 576.0, 573.0, 573.0, 570.0, 465.0, 519.0, 525.0, 525.0, 573.0, 525.0, 576.0, 582.0, 576.0, 573.0, 570.0, 573.0, 522.0, 525.0, 536.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 252.0, 259.0, 266.0, 279.0, 303.0, 286.0, 290.0, 291.0, 285.0, 262.0, 260.0, 255.0, 267.0, 270.0, 246.0, 282.0, 294.0, 290.0, 283.0, 308.0, 265.0, 286.0, 284.0, 231.0, 234.0, 253.0, 266.0, 271.0, 254.0, 261.0, 264.0, 283.0, 290.0, 257.0, 268.0, 297.0, 279.0, 282.0, 300.0, 287.0, 289.0, 280.0, 293.0, 280.0, 290.0, 283.0, 290.0, 263.0, 259.0, 270.0, 255.0, 266.0, 270.0, 298.0, 281.0, 255.0, 264.0, 302.0, 271.0, 282.0, 297.0, 288.0, 291.0, 280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950939124240252, "mean_inference_ms": 1.2349222522925547, "mean_action_processing_ms": 0.13351355067475765, "mean_env_wait_ms": 0.8436288772371432, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5017600, "num_agent_steps_trained": 5017600, "num_env_steps_sampled": 2508800, "num_env_steps_trained": 2508800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2508800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5017600, "timers": {"training_iteration_time_ms": 5149.375, "learn_time_ms": 1227.13, "learn_throughput": 10430.84, "synch_weights_time_ms": 14.269}, "counters": {"num_env_steps_sampled": 2508800, "num_env_steps_trained": 2508800, "num_agent_steps_sampled": 5017600, "num_agent_steps_trained": 5017600}, "done": false, "episodes_total": 6272, "training_iteration": 196, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-12-56", "timestamp": 1666581176, "time_this_iter_s": 5.200287818908691, "time_total_s": 749.8654925823212, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 749.8654925823212, "timesteps_since_restore": 0, "iterations_since_restore": 196, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 71.8125, "ram_util_percent": 19.0}} +{"custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 171.01, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.47, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.6, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.54, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005259969620965421, "policy_loss": 0.0002679735189303756, "vf_loss": 7.586305618286133, "vf_explained_var": 0.6561870574951172, "kl": 0.001961564412340522, "entropy": 1.0012136697769165, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2521600, "num_env_steps_trained": 2521600, "num_agent_steps_sampled": 5043200, "num_agent_steps_trained": 5043200}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 362.0, "episode_reward_mean": 554.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 177.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.105}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 171.01, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.47, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.6, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.33, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.22, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.54, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 11, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.14, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6988818221866362, "mean_inference_ms": 1.2486215365158422, "mean_action_processing_ms": 0.13399369704943986, "mean_env_wait_ms": 0.8473021284275585, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 362.0, "episode_reward_mean": 554.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 177.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.105}, "hist_stats": {"episode_reward": [579.0, 530.0, 525.0, 579.0, 530.0, 567.0, 522.0, 579.0, 527.0, 570.0, 582.0, 573.0, 522.0, 576.0, 579.0, 576.0, 522.0, 573.0, 582.0, 576.0, 576.0, 570.0, 573.0, 579.0, 522.0, 567.0, 522.0, 582.0, 525.0, 507.0, 582.0, 573.0, 530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 299.0, 256.0, 274.0, 265.0, 260.0, 285.0, 294.0, 274.0, 256.0, 297.0, 270.0, 254.0, 268.0, 275.0, 304.0, 275.0, 252.0, 279.0, 291.0, 298.0, 284.0, 293.0, 280.0, 263.0, 259.0, 298.0, 278.0, 290.0, 289.0, 296.0, 280.0, 256.0, 266.0, 288.0, 285.0, 295.0, 287.0, 276.0, 300.0, 291.0, 285.0, 281.0, 289.0, 290.0, 283.0, 278.0, 301.0, 265.0, 257.0, 279.0, 288.0, 265.0, 257.0, 294.0, 288.0, 275.0, 250.0, 248.0, 259.0, 294.0, 288.0, 281.0, 292.0, 265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6988818221866362, "mean_inference_ms": 1.2486215365158422, "mean_action_processing_ms": 0.13399369704943986, "mean_env_wait_ms": 0.8473021284275585, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5043200, "num_agent_steps_trained": 5043200, "num_env_steps_sampled": 2521600, "num_env_steps_trained": 2521600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2521600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5043200, "timers": {"training_iteration_time_ms": 5864.751, "learn_time_ms": 1229.533, "learn_throughput": 10410.46, "synch_weights_time_ms": 15.262}, "counters": {"num_env_steps_sampled": 2521600, "num_env_steps_trained": 2521600, "num_agent_steps_sampled": 5043200, "num_agent_steps_trained": 5043200}, "done": false, "episodes_total": 6304, "training_iteration": 197, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-07", "timestamp": 1666581187, "time_this_iter_s": 10.88671064376831, "time_total_s": 760.7522032260895, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 760.7522032260895, "timesteps_since_restore": 0, "iterations_since_restore": 197, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 83.80000000000001, "ram_util_percent": 21.525}} +{"custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.23, "shaped_reward_min": 111, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.11, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -1.2172211427241564e-05, "policy_loss": -0.0002627079957164824, "vf_loss": 7.566778182983398, "vf_explained_var": 0.6604992151260376, "kl": 0.0018601968185976148, "entropy": 1.0122833251953125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2534400, "num_env_steps_trained": 2534400, "num_agent_steps_sampled": 5068800, "num_agent_steps_trained": 5068800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 351.0, "episode_reward_mean": 551.03, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 275.515}, "custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 170.23, "shaped_reward_min": 111, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.66, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.29, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.33, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 11, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.11, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 7, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016750382633622, "mean_inference_ms": 1.2584238626418844, "mean_action_processing_ms": 0.1343697593979121, "mean_env_wait_ms": 0.8502420561186278, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 351.0, "episode_reward_mean": 551.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 275.515}, "hist_stats": {"episode_reward": [530.0, 576.0, 522.0, 579.0, 522.0, 582.0, 576.0, 525.0, 476.0, 576.0, 479.0, 582.0, 522.0, 627.0, 525.0, 558.0, 525.0, 576.0, 582.0, 576.0, 579.0, 576.0, 573.0, 579.0, 573.0, 530.0, 519.0, 573.0, 525.0, 530.0, 519.0, 576.0, 570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 265.0, 296.0, 280.0, 262.0, 260.0, 283.0, 296.0, 262.0, 260.0, 298.0, 284.0, 285.0, 291.0, 261.0, 264.0, 237.0, 239.0, 289.0, 287.0, 237.0, 242.0, 294.0, 288.0, 265.0, 257.0, 321.0, 306.0, 268.0, 257.0, 270.0, 288.0, 261.0, 264.0, 285.0, 291.0, 290.0, 292.0, 292.0, 284.0, 284.0, 295.0, 295.0, 281.0, 287.0, 286.0, 291.0, 288.0, 290.0, 283.0, 268.0, 262.0, 261.0, 258.0, 285.0, 288.0, 267.0, 258.0, 260.0, 270.0, 254.0, 265.0, 292.0, 284.0, 283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016750382633622, "mean_inference_ms": 1.2584238626418844, "mean_action_processing_ms": 0.1343697593979121, "mean_env_wait_ms": 0.8502420561186278, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5068800, "num_agent_steps_trained": 5068800, "num_env_steps_sampled": 2534400, "num_env_steps_trained": 2534400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2534400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5068800, "timers": {"training_iteration_time_ms": 5913.439, "learn_time_ms": 1228.193, "learn_throughput": 10421.814, "synch_weights_time_ms": 15.093}, "counters": {"num_env_steps_sampled": 2534400, "num_env_steps_trained": 2534400, "num_agent_steps_sampled": 5068800, "num_agent_steps_trained": 5068800}, "done": false, "episodes_total": 6336, "training_iteration": 198, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-12", "timestamp": 1666581192, "time_this_iter_s": 4.2692272663116455, "time_total_s": 765.0214304924011, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 765.0214304924011, "timesteps_since_restore": 0, "iterations_since_restore": 198, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 46.93333333333334, "ram_util_percent": 21.333333333333332}} +{"custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 168.38, "shaped_reward_min": 94, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.56, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.17, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008545387536287308, "policy_loss": -0.0011101895943284035, "vf_loss": 7.604818820953369, "vf_explained_var": 0.6474588513374329, "kl": 0.002114715985953808, "entropy": 1.0096606016159058, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2547200, "num_env_steps_trained": 2547200, "num_agent_steps_sampled": 5094400, "num_agent_steps_trained": 5094400}, "sampler_results": {"episode_reward_max": 584.0, "episode_reward_min": 294.0, "episode_reward_mean": 547.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.59}, "custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 168.38, "shaped_reward_min": 94, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.56, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.17, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7040670537319909, "mean_inference_ms": 1.2672877433683731, "mean_action_processing_ms": 0.13470833472821803, "mean_env_wait_ms": 0.8530001805779409, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 584.0, "episode_reward_min": 294.0, "episode_reward_mean": 547.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.59}, "hist_stats": {"episode_reward": [570.0, 573.0, 582.0, 573.0, 519.0, 576.0, 573.0, 576.0, 525.0, 584.0, 573.0, 362.0, 582.0, 582.0, 522.0, 579.0, 573.0, 573.0, 573.0, 573.0, 436.0, 527.0, 579.0, 513.0, 573.0, 576.0, 522.0, 579.0, 525.0, 579.0, 576.0, 582.0, 579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 282.0, 291.0, 287.0, 295.0, 282.0, 291.0, 277.0, 242.0, 296.0, 280.0, 285.0, 288.0, 294.0, 282.0, 252.0, 273.0, 302.0, 282.0, 290.0, 283.0, 177.0, 185.0, 290.0, 292.0, 299.0, 283.0, 249.0, 273.0, 296.0, 283.0, 287.0, 286.0, 289.0, 284.0, 290.0, 283.0, 280.0, 293.0, 215.0, 221.0, 261.0, 266.0, 283.0, 296.0, 253.0, 260.0, 301.0, 272.0, 292.0, 284.0, 251.0, 271.0, 298.0, 281.0, 253.0, 272.0, 291.0, 288.0, 290.0, 286.0, 291.0, 291.0, 297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7040670537319909, "mean_inference_ms": 1.2672877433683731, "mean_action_processing_ms": 0.13470833472821803, "mean_env_wait_ms": 0.8530001805779409, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5094400, "num_agent_steps_trained": 5094400, "num_env_steps_sampled": 2547200, "num_env_steps_trained": 2547200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2547200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5094400, "timers": {"training_iteration_time_ms": 5977.71, "learn_time_ms": 1238.377, "learn_throughput": 10336.109, "synch_weights_time_ms": 14.996}, "counters": {"num_env_steps_sampled": 2547200, "num_env_steps_trained": 2547200, "num_agent_steps_sampled": 5094400, "num_agent_steps_trained": 5094400}, "done": false, "episodes_total": 6368, "training_iteration": 199, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-16", "timestamp": 1666581196, "time_this_iter_s": 4.223273992538452, "time_total_s": 769.2447044849396, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 769.2447044849396, "timesteps_since_restore": 0, "iterations_since_restore": 199, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 49.0, "ram_util_percent": 19.357142857142858}} +{"evaluation": {"average_sparse_reward": 180.0, "num_healthy_workers": 0, "num_recreated_workers": 0}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 167.89, "shaped_reward_min": 94, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019026012159883976, "policy_loss": -0.002160794800147414, "vf_loss": 7.632315635681152, "vf_explained_var": 0.6613626480102539, "kl": 0.0017705156933516264, "entropy": 1.0100735425949097, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2560000, "num_env_steps_trained": 2560000, "num_agent_steps_sampled": 5120000, "num_agent_steps_trained": 5120000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 294.0, "episode_reward_mean": 545.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.945}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 200, "shaped_reward_mean": 167.89, "shaped_reward_min": 94, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.39, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.29, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7068202919176193, "mean_inference_ms": 1.273063021322113, "mean_action_processing_ms": 0.13496670264422456, "mean_env_wait_ms": 0.8546731088104613, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 294.0, "episode_reward_mean": 545.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 272.945}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 459.0, 573.0, 576.0, 518.0, 522.0, 573.0, 525.0, 576.0, 579.0, 525.0, 527.0, 579.0, 525.0, 570.0, 579.0, 570.0, 570.0, 530.0, 576.0, 351.0, 573.0, 579.0, 530.0, 579.0, 522.0, 570.0, 573.0, 533.0, 576.0, 525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 290.0, 289.0, 297.0, 282.0, 238.0, 221.0, 290.0, 283.0, 291.0, 285.0, 261.0, 257.0, 259.0, 263.0, 283.0, 290.0, 267.0, 258.0, 303.0, 273.0, 292.0, 287.0, 275.0, 250.0, 265.0, 262.0, 289.0, 290.0, 261.0, 264.0, 279.0, 291.0, 286.0, 293.0, 285.0, 285.0, 279.0, 291.0, 266.0, 264.0, 296.0, 280.0, 176.0, 175.0, 288.0, 285.0, 277.0, 302.0, 271.0, 259.0, 291.0, 288.0, 273.0, 249.0, 299.0, 271.0, 275.0, 298.0, 263.0, 270.0, 291.0, 285.0, 265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7068202919176193, "mean_inference_ms": 1.273063021322113, "mean_action_processing_ms": 0.13496670264422456, "mean_env_wait_ms": 0.8546731088104613, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5120000, "num_agent_steps_trained": 5120000, "num_env_steps_sampled": 2560000, "num_env_steps_trained": 2560000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2560000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5120000, "timers": {"training_iteration_time_ms": 5798.303, "learn_time_ms": 1238.885, "learn_throughput": 10331.871, "synch_weights_time_ms": 16.153}, "counters": {"num_env_steps_sampled": 2560000, "num_env_steps_trained": 2560000, "num_agent_steps_sampled": 5120000, "num_agent_steps_trained": 5120000}, "done": false, "episodes_total": 6400, "training_iteration": 200, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-28", "timestamp": 1666581208, "time_this_iter_s": 12.11938214302063, "time_total_s": 781.3640866279602, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 781.3640866279602, "timesteps_since_restore": 0, "iterations_since_restore": 200, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 82.9470588235294, "ram_util_percent": 17.2764705882353}} +{"custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 168.23, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.68, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.51, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.07, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.51, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.07, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.51, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.07, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002211037091910839, "policy_loss": 0.0019596759229898453, "vf_loss": 7.57273530960083, "vf_explained_var": 0.645828902721405, "kl": 0.005030768923461437, "entropy": 1.0118223428726196, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2572800, "num_env_steps_trained": 2572800, "num_agent_steps_sampled": 5145600, "num_agent_steps_trained": 5145600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 546.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.315}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 168.23, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.68, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.51, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.07, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.51, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.07, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.51, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.07, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7094424296946303, "mean_inference_ms": 1.277790367588252, "mean_action_processing_ms": 0.1351939188241857, "mean_env_wait_ms": 0.8561024471428149, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 546.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 273.315}, "hist_stats": {"episode_reward": [525.0, 573.0, 530.0, 522.0, 522.0, 294.0, 519.0, 525.0, 570.0, 576.0, 576.0, 579.0, 370.0, 522.0, 519.0, 573.0, 530.0, 519.0, 561.0, 573.0, 525.0, 576.0, 525.0, 579.0, 573.0, 576.0, 573.0, 492.0, 573.0, 522.0, 576.0, 573.0, 573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 260.0, 298.0, 275.0, 259.0, 271.0, 280.0, 242.0, 253.0, 269.0, 153.0, 141.0, 250.0, 269.0, 263.0, 262.0, 288.0, 282.0, 280.0, 296.0, 290.0, 286.0, 273.0, 306.0, 176.0, 194.0, 262.0, 260.0, 252.0, 267.0, 298.0, 275.0, 267.0, 263.0, 265.0, 254.0, 281.0, 280.0, 284.0, 289.0, 260.0, 265.0, 295.0, 281.0, 247.0, 278.0, 286.0, 293.0, 282.0, 291.0, 292.0, 284.0, 283.0, 290.0, 241.0, 251.0, 290.0, 283.0, 271.0, 251.0, 291.0, 285.0, 285.0, 288.0, 271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7094424296946303, "mean_inference_ms": 1.277790367588252, "mean_action_processing_ms": 0.1351939188241857, "mean_env_wait_ms": 0.8561024471428149, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5145600, "num_agent_steps_trained": 5145600, "num_env_steps_sampled": 2572800, "num_env_steps_trained": 2572800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2572800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5145600, "timers": {"training_iteration_time_ms": 5789.907, "learn_time_ms": 1229.164, "learn_throughput": 10413.58, "synch_weights_time_ms": 16.659}, "counters": {"num_env_steps_sampled": 2572800, "num_env_steps_trained": 2572800, "num_agent_steps_sampled": 5145600, "num_agent_steps_trained": 5145600}, "done": false, "episodes_total": 6432, "training_iteration": 201, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-33", "timestamp": 1666581213, "time_this_iter_s": 4.224009275436401, "time_total_s": 785.5880959033966, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 785.5880959033966, "timesteps_since_restore": 0, "iterations_since_restore": 201, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 47.76666666666667, "ram_util_percent": 20.93333333333333}} +{"custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 170.1, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.99, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.04, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.04, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.04, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0002456922084093094, "policy_loss": -0.0005025984719395638, "vf_loss": 7.635164737701416, "vf_explained_var": 0.6201376914978027, "kl": 0.0017764116637408733, "entropy": 1.0132180452346802, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2585600, "num_env_steps_trained": 2585600, "num_agent_steps_sampled": 5171200, "num_agent_steps_trained": 5171200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 552.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 276.05}, "custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 170.1, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.99, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.23, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.04, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 4, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.04, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.04, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7120554492810086, "mean_inference_ms": 1.2828197056843382, "mean_action_processing_ms": 0.13543027987221926, "mean_env_wait_ms": 0.857572884175066, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 552.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 306.0}, "policy_reward_mean": {"ppo": 276.05}, "hist_stats": {"episode_reward": [573.0, 576.0, 570.0, 573.0, 582.0, 573.0, 525.0, 341.0, 573.0, 573.0, 576.0, 513.0, 533.0, 525.0, 525.0, 582.0, 530.0, 576.0, 579.0, 573.0, 582.0, 573.0, 576.0, 576.0, 576.0, 473.0, 525.0, 522.0, 530.0, 576.0, 576.0, 573.0, 525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [271.0, 302.0, 288.0, 288.0, 276.0, 294.0, 285.0, 288.0, 290.0, 292.0, 286.0, 287.0, 260.0, 265.0, 168.0, 173.0, 291.0, 282.0, 287.0, 286.0, 278.0, 298.0, 257.0, 256.0, 269.0, 264.0, 255.0, 270.0, 266.0, 259.0, 292.0, 290.0, 264.0, 266.0, 291.0, 285.0, 292.0, 287.0, 288.0, 285.0, 283.0, 299.0, 270.0, 303.0, 288.0, 288.0, 287.0, 289.0, 288.0, 288.0, 245.0, 228.0, 249.0, 276.0, 266.0, 256.0, 257.0, 273.0, 305.0, 271.0, 295.0, 281.0, 306.0, 267.0, 262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7120554492810086, "mean_inference_ms": 1.2828197056843382, "mean_action_processing_ms": 0.13543027987221926, "mean_env_wait_ms": 0.857572884175066, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5171200, "num_agent_steps_trained": 5171200, "num_env_steps_sampled": 2585600, "num_env_steps_trained": 2585600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2585600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5171200, "timers": {"training_iteration_time_ms": 5822.715, "learn_time_ms": 1227.852, "learn_throughput": 10424.712, "synch_weights_time_ms": 16.557}, "counters": {"num_env_steps_sampled": 2585600, "num_env_steps_trained": 2585600, "num_agent_steps_sampled": 5171200, "num_agent_steps_trained": 5171200}, "done": false, "episodes_total": 6464, "training_iteration": 202, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-37", "timestamp": 1666581217, "time_this_iter_s": 4.5087339878082275, "time_total_s": 790.0968298912048, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 790.0968298912048, "timesteps_since_restore": 0, "iterations_since_restore": 202, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 46.98571428571428, "ram_util_percent": 21.7}} +{"custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 169.69, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013950903667137027, "policy_loss": -0.0016577579081058502, "vf_loss": 7.6537275314331055, "vf_explained_var": 0.6278898119926453, "kl": 0.0020676562562584877, "entropy": 1.005408525466919, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2598400, "num_env_steps_trained": 2598400, "num_agent_steps_sampled": 5196800, "num_agent_steps_trained": 5196800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 552.49, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 276.245}, "custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 169.69, "shaped_reward_min": 91, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712889251168771, "mean_inference_ms": 1.28390343160238, "mean_action_processing_ms": 0.13554123488560507, "mean_env_wait_ms": 0.8581576529366974, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 251.0, "episode_reward_mean": 552.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 301.0}, "policy_reward_mean": {"ppo": 276.245}, "hist_stats": {"episode_reward": [525.0, 579.0, 570.0, 570.0, 579.0, 530.0, 582.0, 579.0, 579.0, 251.0, 579.0, 576.0, 519.0, 573.0, 573.0, 576.0, 519.0, 579.0, 579.0, 579.0, 582.0, 516.0, 522.0, 530.0, 522.0, 579.0, 576.0, 576.0, 525.0, 576.0, 576.0, 576.0, 516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 263.0, 285.0, 294.0, 272.0, 298.0, 300.0, 270.0, 301.0, 278.0, 256.0, 274.0, 288.0, 294.0, 287.0, 292.0, 296.0, 283.0, 133.0, 118.0, 297.0, 282.0, 277.0, 299.0, 248.0, 271.0, 290.0, 283.0, 288.0, 285.0, 301.0, 275.0, 253.0, 266.0, 284.0, 295.0, 283.0, 296.0, 285.0, 294.0, 291.0, 291.0, 251.0, 265.0, 266.0, 256.0, 259.0, 271.0, 256.0, 266.0, 285.0, 294.0, 294.0, 282.0, 290.0, 286.0, 270.0, 255.0, 294.0, 282.0, 285.0, 291.0, 285.0, 291.0, 257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712889251168771, "mean_inference_ms": 1.28390343160238, "mean_action_processing_ms": 0.13554123488560507, "mean_env_wait_ms": 0.8581576529366974, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5196800, "num_agent_steps_trained": 5196800, "num_env_steps_sampled": 2598400, "num_env_steps_trained": 2598400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2598400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5196800, "timers": {"training_iteration_time_ms": 5768.202, "learn_time_ms": 1219.407, "learn_throughput": 10496.902, "synch_weights_time_ms": 16.577}, "counters": {"num_env_steps_sampled": 2598400, "num_env_steps_trained": 2598400, "num_agent_steps_sampled": 5196800, "num_agent_steps_trained": 5196800}, "done": false, "episodes_total": 6496, "training_iteration": 203, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-41", "timestamp": 1666581221, "time_this_iter_s": 3.7892675399780273, "time_total_s": 793.8860974311829, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 793.8860974311829, "timesteps_since_restore": 0, "iterations_since_restore": 203, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.3, "ram_util_percent": 22.016666666666666}} +{"custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.32, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.93, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.55, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.46, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.83, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.46, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.46, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00187534443102777, "policy_loss": -0.0021297007333487272, "vf_loss": 7.590843677520752, "vf_explained_var": 0.6290745139122009, "kl": 0.002140925731509924, "entropy": 1.0094540119171143, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2611200, "num_env_steps_trained": 2611200, "num_agent_steps_sampled": 5222400, "num_agent_steps_trained": 5222400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 552.52, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 300.0}, "policy_reward_mean": {"ppo": 276.26}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.32, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.93, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.55, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.28, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.46, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.33, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.11, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.83, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.46, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.46, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7131311767953434, "mean_inference_ms": 1.2843261845942395, "mean_action_processing_ms": 0.1356088421792502, "mean_env_wait_ms": 0.8584182074596766, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 552.52, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 300.0}, "policy_reward_mean": {"ppo": 276.26}, "hist_stats": {"episode_reward": [516.0, 573.0, 576.0, 576.0, 522.0, 462.0, 530.0, 570.0, 576.0, 522.0, 579.0, 510.0, 579.0, 576.0, 576.0, 530.0, 576.0, 576.0, 576.0, 525.0, 576.0, 576.0, 579.0, 579.0, 530.0, 573.0, 522.0, 522.0, 579.0, 459.0, 573.0, 579.0, 576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [257.0, 259.0, 276.0, 297.0, 277.0, 299.0, 288.0, 288.0, 249.0, 273.0, 233.0, 229.0, 257.0, 273.0, 299.0, 271.0, 292.0, 284.0, 257.0, 265.0, 293.0, 286.0, 249.0, 261.0, 300.0, 279.0, 277.0, 299.0, 291.0, 285.0, 265.0, 265.0, 288.0, 288.0, 290.0, 286.0, 285.0, 291.0, 275.0, 250.0, 294.0, 282.0, 285.0, 291.0, 287.0, 292.0, 284.0, 295.0, 264.0, 266.0, 277.0, 296.0, 268.0, 254.0, 269.0, 253.0, 291.0, 288.0, 229.0, 230.0, 293.0, 280.0, 290.0, 289.0, 286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7131311767953434, "mean_inference_ms": 1.2843261845942395, "mean_action_processing_ms": 0.1356088421792502, "mean_env_wait_ms": 0.8584182074596766, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5222400, "num_agent_steps_trained": 5222400, "num_env_steps_sampled": 2611200, "num_env_steps_trained": 2611200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2611200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5222400, "timers": {"training_iteration_time_ms": 5710.771, "learn_time_ms": 1195.228, "learn_throughput": 10709.252, "synch_weights_time_ms": 15.847}, "counters": {"num_env_steps_sampled": 2611200, "num_env_steps_trained": 2611200, "num_agent_steps_sampled": 5222400, "num_agent_steps_trained": 5222400}, "done": false, "episodes_total": 6528, "training_iteration": 204, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-45", "timestamp": 1666581225, "time_this_iter_s": 3.8779067993164062, "time_total_s": 797.7640042304993, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 797.7640042304993, "timesteps_since_restore": 0, "iterations_since_restore": 204, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 30.820000000000004, "ram_util_percent": 12.9}} +{"custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.0, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.81, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.09, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 3.6696321330964565e-05, "policy_loss": -0.00021456298418343067, "vf_loss": 7.583790302276611, "vf_explained_var": 0.6205594539642334, "kl": 0.00221795542165637, "entropy": 1.0142393112182617, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2624000, "num_env_steps_trained": 2624000, "num_agent_steps_sampled": 5248000, "num_agent_steps_trained": 5248000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 550.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 275.3}, "custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.0, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.81, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.09, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7131083339732492, "mean_inference_ms": 1.2842183875528914, "mean_action_processing_ms": 0.13563136158971595, "mean_env_wait_ms": 0.858410746337438, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 550.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 275.3}, "hist_stats": {"episode_reward": [576.0, 582.0, 522.0, 576.0, 576.0, 570.0, 522.0, 576.0, 573.0, 522.0, 516.0, 576.0, 570.0, 570.0, 501.0, 573.0, 576.0, 473.0, 573.0, 525.0, 570.0, 576.0, 519.0, 522.0, 522.0, 570.0, 573.0, 522.0, 576.0, 576.0, 519.0, 530.0, 579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 291.0, 291.0, 257.0, 265.0, 296.0, 280.0, 293.0, 283.0, 296.0, 274.0, 263.0, 259.0, 282.0, 294.0, 290.0, 283.0, 270.0, 252.0, 256.0, 260.0, 288.0, 288.0, 286.0, 284.0, 290.0, 280.0, 253.0, 248.0, 285.0, 288.0, 294.0, 282.0, 226.0, 247.0, 293.0, 280.0, 270.0, 255.0, 286.0, 284.0, 292.0, 284.0, 255.0, 264.0, 267.0, 255.0, 270.0, 252.0, 273.0, 297.0, 278.0, 295.0, 267.0, 255.0, 292.0, 284.0, 292.0, 284.0, 259.0, 260.0, 260.0, 270.0, 281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7131083339732492, "mean_inference_ms": 1.2842183875528914, "mean_action_processing_ms": 0.13563136158971595, "mean_env_wait_ms": 0.858410746337438, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5248000, "num_agent_steps_trained": 5248000, "num_env_steps_sampled": 2624000, "num_env_steps_trained": 2624000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2624000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5248000, "timers": {"training_iteration_time_ms": 5242.623, "learn_time_ms": 1190.196, "learn_throughput": 10754.527, "synch_weights_time_ms": 15.664}, "counters": {"num_env_steps_sampled": 2624000, "num_env_steps_trained": 2624000, "num_agent_steps_sampled": 5248000, "num_agent_steps_trained": 5248000}, "done": false, "episodes_total": 6560, "training_iteration": 205, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-49", "timestamp": 1666581229, "time_this_iter_s": 3.7952888011932373, "time_total_s": 801.5592930316925, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 801.5592930316925, "timesteps_since_restore": 0, "iterations_since_restore": 205, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.25, "ram_util_percent": 10.633333333333333}} +{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.83, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.77, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.81, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009396851528435946, "policy_loss": -0.0011760840425267816, "vf_loss": 7.405368804931641, "vf_explained_var": 0.6177560091018677, "kl": 0.002221380826085806, "entropy": 1.0082740783691406, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2636800, "num_env_steps_trained": 2636800, "num_agent_steps_sampled": 5273600, "num_agent_steps_trained": 5273600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 552.23, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 276.115}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.83, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.56, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.37, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.28, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.77, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.81, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.713028566024557, "mean_inference_ms": 1.2839819542750601, "mean_action_processing_ms": 0.13563662092487597, "mean_env_wait_ms": 0.8583015391835335, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 552.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 276.115}, "hist_stats": {"episode_reward": [579.0, 570.0, 573.0, 579.0, 570.0, 519.0, 579.0, 579.0, 525.0, 573.0, 522.0, 579.0, 582.0, 573.0, 522.0, 522.0, 522.0, 573.0, 576.0, 573.0, 567.0, 570.0, 525.0, 519.0, 525.0, 573.0, 582.0, 573.0, 573.0, 459.0, 522.0, 576.0, 522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 298.0, 281.0, 289.0, 280.0, 293.0, 286.0, 293.0, 282.0, 288.0, 261.0, 258.0, 294.0, 285.0, 290.0, 289.0, 253.0, 272.0, 286.0, 287.0, 258.0, 264.0, 296.0, 283.0, 292.0, 290.0, 285.0, 288.0, 261.0, 261.0, 265.0, 257.0, 265.0, 257.0, 286.0, 287.0, 298.0, 278.0, 281.0, 292.0, 295.0, 272.0, 283.0, 287.0, 261.0, 264.0, 269.0, 250.0, 265.0, 260.0, 290.0, 283.0, 285.0, 297.0, 276.0, 297.0, 283.0, 290.0, 227.0, 232.0, 258.0, 264.0, 280.0, 296.0, 263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.713028566024557, "mean_inference_ms": 1.2839819542750601, "mean_action_processing_ms": 0.13563662092487597, "mean_env_wait_ms": 0.8583015391835335, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5273600, "num_agent_steps_trained": 5273600, "num_env_steps_sampled": 2636800, "num_env_steps_trained": 2636800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2636800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5273600, "timers": {"training_iteration_time_ms": 5145.504, "learn_time_ms": 1162.921, "learn_throughput": 11006.77, "synch_weights_time_ms": 14.404}, "counters": {"num_env_steps_sampled": 2636800, "num_env_steps_trained": 2636800, "num_agent_steps_sampled": 5273600, "num_agent_steps_trained": 5273600}, "done": false, "episodes_total": 6592, "training_iteration": 206, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-53", "timestamp": 1666581233, "time_this_iter_s": 3.7356982231140137, "time_total_s": 805.2949912548065, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 805.2949912548065, "timesteps_since_restore": 0, "iterations_since_restore": 206, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.183333333333334, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.4, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.15, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.48, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.15, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.48, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.15, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.48, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016882693162187934, "policy_loss": -0.0019414026755839586, "vf_loss": 7.55963134765625, "vf_explained_var": 0.6160889863967896, "kl": 0.0016779176658019423, "entropy": 1.0056562423706055, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2649600, "num_env_steps_trained": 2649600, "num_agent_steps_sampled": 5299200, "num_agent_steps_trained": 5299200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 549.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 274.5}, "custom_metrics": {"sparse_reward_mean": 189.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.4, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.85, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.71, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.15, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.48, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.34, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.15, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.48, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.15, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.48, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7129065882467726, "mean_inference_ms": 1.2836145170529432, "mean_action_processing_ms": 0.13563081346771022, "mean_env_wait_ms": 0.8581502987526111, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 549.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 274.5}, "hist_stats": {"episode_reward": [522.0, 579.0, 525.0, 576.0, 519.0, 576.0, 525.0, 579.0, 533.0, 525.0, 573.0, 579.0, 570.0, 530.0, 522.0, 527.0, 525.0, 576.0, 530.0, 530.0, 525.0, 519.0, 573.0, 522.0, 518.0, 522.0, 525.0, 573.0, 570.0, 573.0, 525.0, 519.0, 576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 259.0, 281.0, 298.0, 266.0, 259.0, 281.0, 295.0, 245.0, 274.0, 287.0, 289.0, 262.0, 263.0, 291.0, 288.0, 266.0, 267.0, 268.0, 257.0, 301.0, 272.0, 291.0, 288.0, 275.0, 295.0, 278.0, 252.0, 272.0, 250.0, 280.0, 247.0, 255.0, 270.0, 283.0, 293.0, 264.0, 266.0, 249.0, 281.0, 259.0, 266.0, 257.0, 262.0, 284.0, 289.0, 246.0, 276.0, 245.0, 273.0, 251.0, 271.0, 262.0, 263.0, 287.0, 286.0, 291.0, 279.0, 271.0, 302.0, 266.0, 259.0, 262.0, 257.0, 282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7129065882467726, "mean_inference_ms": 1.2836145170529432, "mean_action_processing_ms": 0.13563081346771022, "mean_env_wait_ms": 0.8581502987526111, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5299200, "num_agent_steps_trained": 5299200, "num_env_steps_sampled": 2649600, "num_env_steps_trained": 2649600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2649600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5299200, "timers": {"training_iteration_time_ms": 4431.021, "learn_time_ms": 1160.982, "learn_throughput": 11025.15, "synch_weights_time_ms": 14.094}, "counters": {"num_env_steps_sampled": 2649600, "num_env_steps_trained": 2649600, "num_agent_steps_sampled": 5299200, "num_agent_steps_trained": 5299200}, "done": false, "episodes_total": 6624, "training_iteration": 207, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-13-57", "timestamp": 1666581237, "time_this_iter_s": 3.7433922290802, "time_total_s": 809.0383834838867, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 809.0383834838867, "timesteps_since_restore": 0, "iterations_since_restore": 207, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.380000000000003, "ram_util_percent": 10.64}} +{"custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.24, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.15, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.75, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.52, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.52, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.52, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.004782018251717091, "policy_loss": -0.005048870109021664, "vf_loss": 7.675145149230957, "vf_explained_var": 0.6215179562568665, "kl": 0.0018105552298948169, "entropy": 1.001321792602539, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2662400, "num_env_steps_trained": 2662400, "num_agent_steps_sampled": 5324800, "num_agent_steps_trained": 5324800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 549.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 274.62}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.24, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.15, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.75, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.34, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.52, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.34, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.52, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.52, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7127601978504233, "mean_inference_ms": 1.28335528460283, "mean_action_processing_ms": 0.135616836400308, "mean_env_wait_ms": 0.8582178172002506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 549.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 228.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 274.62}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 573.0, 573.0, 573.0, 573.0, 516.0, 530.0, 522.0, 579.0, 579.0, 570.0, 582.0, 576.0, 522.0, 573.0, 576.0, 525.0, 519.0, 576.0, 581.0, 579.0, 519.0, 570.0, 573.0, 573.0, 530.0, 525.0, 576.0, 519.0, 530.0, 522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 294.0, 290.0, 286.0, 295.0, 278.0, 289.0, 284.0, 287.0, 286.0, 292.0, 281.0, 283.0, 290.0, 253.0, 263.0, 268.0, 262.0, 265.0, 257.0, 285.0, 294.0, 297.0, 282.0, 297.0, 273.0, 292.0, 290.0, 290.0, 286.0, 263.0, 259.0, 280.0, 293.0, 282.0, 294.0, 253.0, 272.0, 265.0, 254.0, 278.0, 298.0, 297.0, 284.0, 295.0, 284.0, 268.0, 251.0, 290.0, 280.0, 299.0, 274.0, 288.0, 285.0, 267.0, 263.0, 267.0, 258.0, 285.0, 291.0, 251.0, 268.0, 269.0, 261.0, 261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7127601978504233, "mean_inference_ms": 1.28335528460283, "mean_action_processing_ms": 0.135616836400308, "mean_env_wait_ms": 0.8582178172002506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5324800, "num_agent_steps_trained": 5324800, "num_env_steps_sampled": 2662400, "num_env_steps_trained": 2662400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2662400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5324800, "timers": {"training_iteration_time_ms": 4392.957, "learn_time_ms": 1154.591, "learn_throughput": 11086.175, "synch_weights_time_ms": 14.769}, "counters": {"num_env_steps_sampled": 2662400, "num_env_steps_trained": 2662400, "num_agent_steps_sampled": 5324800, "num_agent_steps_trained": 5324800}, "done": false, "episodes_total": 6656, "training_iteration": 208, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-01", "timestamp": 1666581241, "time_this_iter_s": 3.871088743209839, "time_total_s": 812.9094722270966, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 812.9094722270966, "timesteps_since_restore": 0, "iterations_since_restore": 208, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.099999999999998, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 188.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 168.36, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.37, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.37, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.91, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.77, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.71, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.77, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.71, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.77, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.71, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0028461883775889874, "policy_loss": -0.003107053227722645, "vf_loss": 7.6106414794921875, "vf_explained_var": 0.6246351003646851, "kl": 0.0016450014663860202, "entropy": 1.0003979206085205, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2675200, "num_env_steps_trained": 2675200, "num_agent_steps_sampled": 5350400, "num_agent_steps_trained": 5350400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 272.98}, "custom_metrics": {"sparse_reward_mean": 188.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 168.36, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.37, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.37, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.91, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.38, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.77, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.71, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.37, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.38, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.09, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.77, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.71, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.77, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.71, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7125994316173464, "mean_inference_ms": 1.2830898160989364, "mean_action_processing_ms": 0.1356002703740297, "mean_env_wait_ms": 0.8582668591214645, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 272.98}, "hist_stats": {"episode_reward": [522.0, 573.0, 576.0, 576.0, 579.0, 525.0, 522.0, 522.0, 513.0, 479.0, 468.0, 576.0, 579.0, 573.0, 519.0, 522.0, 573.0, 530.0, 530.0, 522.0, 576.0, 573.0, 576.0, 522.0, 579.0, 579.0, 576.0, 576.0, 576.0, 522.0, 513.0, 527.0, 530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 261.0, 287.0, 286.0, 289.0, 287.0, 284.0, 292.0, 274.0, 305.0, 261.0, 264.0, 261.0, 261.0, 262.0, 260.0, 272.0, 241.0, 229.0, 250.0, 240.0, 228.0, 299.0, 277.0, 283.0, 296.0, 280.0, 293.0, 280.0, 239.0, 252.0, 270.0, 279.0, 294.0, 263.0, 267.0, 269.0, 261.0, 266.0, 256.0, 299.0, 277.0, 290.0, 283.0, 285.0, 291.0, 261.0, 261.0, 293.0, 286.0, 299.0, 280.0, 286.0, 290.0, 286.0, 290.0, 281.0, 295.0, 261.0, 261.0, 236.0, 277.0, 256.0, 271.0, 273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7125994316173464, "mean_inference_ms": 1.2830898160989364, "mean_action_processing_ms": 0.1356002703740297, "mean_env_wait_ms": 0.8582668591214645, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5350400, "num_agent_steps_trained": 5350400, "num_env_steps_sampled": 2675200, "num_env_steps_trained": 2675200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2675200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5350400, "timers": {"training_iteration_time_ms": 4348.02, "learn_time_ms": 1155.85, "learn_throughput": 11074.098, "synch_weights_time_ms": 15.261}, "counters": {"num_env_steps_sampled": 2675200, "num_env_steps_trained": 2675200, "num_agent_steps_sampled": 5350400, "num_agent_steps_trained": 5350400}, "done": false, "episodes_total": 6688, "training_iteration": 209, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-05", "timestamp": 1666581245, "time_this_iter_s": 3.7650973796844482, "time_total_s": 816.674569606781, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 816.674569606781, "timesteps_since_restore": 0, "iterations_since_restore": 209, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.34, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 167.94, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.03, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.12, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0021201916970312595, "policy_loss": -0.002378989476710558, "vf_loss": 7.568546772003174, "vf_explained_var": 0.6088274717330933, "kl": 0.0017179761780425906, "entropy": 0.9961112141609192, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2688000, "num_env_steps_trained": 2688000, "num_agent_steps_sampled": 5376000, "num_agent_steps_trained": 5376000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 546.34, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 273.17}, "custom_metrics": {"sparse_reward_mean": 189.2, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 167.94, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.03, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.63, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.12, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.38, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.37, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.25, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.18, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.36, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.1, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7124529008996413, "mean_inference_ms": 1.2828236653647938, "mean_action_processing_ms": 0.135582557767892, "mean_env_wait_ms": 0.8583057373366506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 546.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 273.17}, "hist_stats": {"episode_reward": [530.0, 525.0, 573.0, 576.0, 513.0, 570.0, 516.0, 573.0, 524.0, 522.0, 519.0, 573.0, 579.0, 527.0, 516.0, 579.0, 573.0, 522.0, 582.0, 576.0, 530.0, 522.0, 510.0, 573.0, 525.0, 582.0, 570.0, 530.0, 522.0, 576.0, 519.0, 522.0, 576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 257.0, 269.0, 256.0, 286.0, 287.0, 277.0, 299.0, 249.0, 264.0, 297.0, 273.0, 258.0, 258.0, 291.0, 282.0, 281.0, 243.0, 271.0, 251.0, 264.0, 255.0, 283.0, 290.0, 290.0, 289.0, 259.0, 268.0, 268.0, 248.0, 287.0, 292.0, 289.0, 284.0, 263.0, 259.0, 282.0, 300.0, 276.0, 300.0, 267.0, 263.0, 256.0, 266.0, 245.0, 265.0, 285.0, 288.0, 260.0, 265.0, 281.0, 301.0, 290.0, 280.0, 271.0, 259.0, 264.0, 258.0, 302.0, 274.0, 263.0, 256.0, 252.0, 270.0, 293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7124529008996413, "mean_inference_ms": 1.2828236653647938, "mean_action_processing_ms": 0.135582557767892, "mean_env_wait_ms": 0.8583057373366506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5376000, "num_agent_steps_trained": 5376000, "num_env_steps_sampled": 2688000, "num_env_steps_trained": 2688000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2688000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5376000, "timers": {"training_iteration_time_ms": 3843.384, "learn_time_ms": 1149.996, "learn_throughput": 11130.47, "synch_weights_time_ms": 14.22}, "counters": {"num_env_steps_sampled": 2688000, "num_env_steps_trained": 2688000, "num_agent_steps_sampled": 5376000, "num_agent_steps_trained": 5376000}, "done": false, "episodes_total": 6720, "training_iteration": 210, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-09", "timestamp": 1666581249, "time_this_iter_s": 3.7147159576416016, "time_total_s": 820.3892855644226, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 820.3892855644226, "timesteps_since_restore": 0, "iterations_since_restore": 210, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.633333333333333, "ram_util_percent": 10.633333333333333}} +{"custom_metrics": {"sparse_reward_mean": 186.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 166.85, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.4, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.67, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.4, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.67, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.4, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.67, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003364190459251404, "policy_loss": -0.003640303621068597, "vf_loss": 7.733088493347168, "vf_explained_var": 0.6351046562194824, "kl": 0.0018460192950442433, "entropy": 0.9943915605545044, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2700800, "num_env_steps_trained": 2700800, "num_agent_steps_sampled": 5401600, "num_agent_steps_trained": 5401600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 539.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 269.825}, "custom_metrics": {"sparse_reward_mean": 186.4, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 166.85, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.35, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.32, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.22, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.4, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.67, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.39, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 3, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.4, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.67, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.4, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.67, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7123105813465136, "mean_inference_ms": 1.2824304752478446, "mean_action_processing_ms": 0.1355679315392704, "mean_env_wait_ms": 0.8581520951086739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 539.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 269.825}, "hist_stats": {"episode_reward": [576.0, 533.0, 525.0, 530.0, 579.0, 533.0, 519.0, 579.0, 525.0, 530.0, 525.0, 522.0, 465.0, 573.0, 573.0, 525.0, 579.0, 570.0, 570.0, 519.0, 525.0, 576.0, 525.0, 570.0, 525.0, 570.0, 573.0, 525.0, 573.0, 525.0, 522.0, 573.0, 567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 264.0, 269.0, 267.0, 258.0, 251.0, 279.0, 286.0, 293.0, 267.0, 266.0, 259.0, 260.0, 285.0, 294.0, 256.0, 269.0, 274.0, 256.0, 271.0, 254.0, 258.0, 264.0, 238.0, 227.0, 290.0, 283.0, 287.0, 286.0, 265.0, 260.0, 283.0, 296.0, 283.0, 287.0, 288.0, 282.0, 271.0, 248.0, 265.0, 260.0, 292.0, 284.0, 279.0, 246.0, 289.0, 281.0, 266.0, 259.0, 292.0, 278.0, 293.0, 280.0, 264.0, 261.0, 292.0, 281.0, 268.0, 257.0, 267.0, 255.0, 286.0, 287.0, 278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7123105813465136, "mean_inference_ms": 1.2824304752478446, "mean_action_processing_ms": 0.1355679315392704, "mean_env_wait_ms": 0.8581520951086739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5401600, "num_agent_steps_trained": 5401600, "num_env_steps_sampled": 2700800, "num_env_steps_trained": 2700800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2700800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5401600, "timers": {"training_iteration_time_ms": 3799.432, "learn_time_ms": 1153.936, "learn_throughput": 11092.47, "synch_weights_time_ms": 14.112}, "counters": {"num_env_steps_sampled": 2700800, "num_env_steps_trained": 2700800, "num_agent_steps_sampled": 5401600, "num_agent_steps_trained": 5401600}, "done": false, "episodes_total": 6752, "training_iteration": 211, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-13", "timestamp": 1666581253, "time_this_iter_s": 3.7709083557128906, "time_total_s": 824.1601939201355, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 824.1601939201355, "timesteps_since_restore": 0, "iterations_since_restore": 211, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.15, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 186.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 166.08, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.09, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.57, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.47, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.43, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.96, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.47, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.43, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.47, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.43, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00038073910400271416, "policy_loss": -0.0006448630592785776, "vf_loss": 7.668740272521973, "vf_explained_var": 0.6172520518302917, "kl": 0.00185579142998904, "entropy": 1.0054981708526611, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2713600, "num_env_steps_trained": 2713600, "num_agent_steps_sampled": 5427200, "num_agent_steps_trained": 5427200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 538.08, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 269.04}, "custom_metrics": {"sparse_reward_mean": 186.0, "sparse_reward_min": 80, "sparse_reward_max": 200, "shaped_reward_mean": 166.08, "shaped_reward_min": 77, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.09, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.57, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.53, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.34, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.24, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.47, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.43, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.12, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.05, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.4, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.96, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.47, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.43, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.47, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.43, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712189427203816, "mean_inference_ms": 1.282044037184765, "mean_action_processing_ms": 0.13555489025797146, "mean_env_wait_ms": 0.8579692422947, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 237.0, "episode_reward_mean": 538.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 269.04}, "hist_stats": {"episode_reward": [567.0, 525.0, 573.0, 576.0, 519.0, 567.0, 525.0, 573.0, 576.0, 570.0, 573.0, 573.0, 576.0, 573.0, 573.0, 573.0, 573.0, 579.0, 519.0, 573.0, 570.0, 576.0, 525.0, 530.0, 573.0, 530.0, 576.0, 519.0, 573.0, 237.0, 573.0, 519.0, 525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [278.0, 289.0, 267.0, 258.0, 287.0, 286.0, 300.0, 276.0, 263.0, 256.0, 284.0, 283.0, 257.0, 268.0, 284.0, 289.0, 293.0, 283.0, 304.0, 266.0, 284.0, 289.0, 283.0, 290.0, 290.0, 286.0, 284.0, 289.0, 288.0, 285.0, 297.0, 276.0, 280.0, 293.0, 288.0, 291.0, 261.0, 258.0, 287.0, 286.0, 277.0, 293.0, 289.0, 287.0, 259.0, 266.0, 271.0, 259.0, 286.0, 287.0, 259.0, 271.0, 291.0, 285.0, 253.0, 266.0, 288.0, 285.0, 117.0, 120.0, 290.0, 283.0, 273.0, 246.0, 256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712189427203816, "mean_inference_ms": 1.282044037184765, "mean_action_processing_ms": 0.13555489025797146, "mean_env_wait_ms": 0.8579692422947, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5427200, "num_agent_steps_trained": 5427200, "num_env_steps_sampled": 2713600, "num_env_steps_trained": 2713600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2713600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5427200, "timers": {"training_iteration_time_ms": 3723.329, "learn_time_ms": 1143.583, "learn_throughput": 11192.888, "synch_weights_time_ms": 13.75}, "counters": {"num_env_steps_sampled": 2713600, "num_env_steps_trained": 2713600, "num_agent_steps_sampled": 5427200, "num_agent_steps_trained": 5427200}, "done": false, "episodes_total": 6784, "training_iteration": 212, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-17", "timestamp": 1666581257, "time_this_iter_s": 3.75581431388855, "time_total_s": 827.916008234024, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 827.916008234024, "timesteps_since_restore": 0, "iterations_since_restore": 212, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.74, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 185.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 166.06, "shaped_reward_min": 111, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.74, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.82, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011561871506273746, "policy_loss": 0.0008946903399191797, "vf_loss": 7.627026557922363, "vf_explained_var": 0.601060152053833, "kl": 0.0017626096960157156, "entropy": 1.0024091005325317, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2726400, "num_env_steps_trained": 2726400, "num_agent_steps_sampled": 5452800, "num_agent_steps_trained": 5452800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 351.0, "episode_reward_mean": 537.66, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 268.83}, "custom_metrics": {"sparse_reward_mean": 185.8, "sparse_reward_min": 120, "sparse_reward_max": 200, "shaped_reward_mean": 166.06, "shaped_reward_min": 111, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.74, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.82, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.48, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.3, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.51, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.44, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.41, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712087007666387, "mean_inference_ms": 1.2816612307739192, "mean_action_processing_ms": 0.1355417041234792, "mean_env_wait_ms": 0.8577920995302917, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 351.0, "episode_reward_mean": 537.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 168.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 268.83}, "hist_stats": {"episode_reward": [525.0, 570.0, 576.0, 525.0, 525.0, 525.0, 516.0, 530.0, 573.0, 533.0, 436.0, 516.0, 530.0, 576.0, 522.0, 487.0, 516.0, 576.0, 522.0, 576.0, 533.0, 527.0, 522.0, 527.0, 482.0, 579.0, 516.0, 519.0, 570.0, 573.0, 351.0, 525.0, 525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 269.0, 282.0, 288.0, 288.0, 288.0, 265.0, 260.0, 264.0, 261.0, 257.0, 268.0, 257.0, 259.0, 255.0, 275.0, 286.0, 287.0, 264.0, 269.0, 237.0, 199.0, 252.0, 264.0, 260.0, 270.0, 298.0, 278.0, 262.0, 260.0, 235.0, 252.0, 276.0, 240.0, 290.0, 286.0, 272.0, 250.0, 287.0, 289.0, 272.0, 261.0, 261.0, 266.0, 262.0, 260.0, 268.0, 259.0, 239.0, 243.0, 299.0, 280.0, 270.0, 246.0, 262.0, 257.0, 277.0, 293.0, 268.0, 305.0, 168.0, 183.0, 270.0, 255.0, 269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.712087007666387, "mean_inference_ms": 1.2816612307739192, "mean_action_processing_ms": 0.1355417041234792, "mean_env_wait_ms": 0.8577920995302917, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5452800, "num_agent_steps_trained": 5452800, "num_env_steps_sampled": 2726400, "num_env_steps_trained": 2726400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2726400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5452800, "timers": {"training_iteration_time_ms": 3701.888, "learn_time_ms": 1133.36, "learn_throughput": 11293.849, "synch_weights_time_ms": 14.403}, "counters": {"num_env_steps_sampled": 2726400, "num_env_steps_trained": 2726400, "num_agent_steps_sampled": 5452800, "num_agent_steps_trained": 5452800}, "done": false, "episodes_total": 6816, "training_iteration": 213, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-21", "timestamp": 1666581261, "time_this_iter_s": 3.575244665145874, "time_total_s": 831.4912528991699, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 831.4912528991699, "timesteps_since_restore": 0, "iterations_since_restore": 213, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 188.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 166.79, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.46, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.84, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.07, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.84, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.84, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014452373143285513, "policy_loss": -0.0017055664211511612, "vf_loss": 7.584623336791992, "vf_explained_var": 0.6165767908096313, "kl": 0.002139848656952381, "entropy": 0.9962633848190308, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2739200, "num_env_steps_trained": 2739200, "num_agent_steps_sampled": 5478400, "num_agent_steps_trained": 5478400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 543.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 271.595}, "custom_metrics": {"sparse_reward_mean": 188.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 166.79, "shaped_reward_min": 139, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.46, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.03, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.5, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.31, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.17, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.16, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.84, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.07, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.48, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.32, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 5, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.84, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.84, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711992025997882, "mean_inference_ms": 1.2812842286076684, "mean_action_processing_ms": 0.13553223868846914, "mean_env_wait_ms": 0.8576304489114933, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 459.0, "episode_reward_mean": 543.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 227.0}, "policy_reward_max": {"ppo": 312.0}, "policy_reward_mean": {"ppo": 271.595}, "hist_stats": {"episode_reward": [525.0, 522.0, 468.0, 582.0, 522.0, 573.0, 570.0, 519.0, 573.0, 522.0, 530.0, 522.0, 573.0, 576.0, 525.0, 459.0, 573.0, 522.0, 530.0, 570.0, 522.0, 573.0, 522.0, 527.0, 573.0, 519.0, 530.0, 519.0, 573.0, 525.0, 519.0, 519.0, 522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 256.0, 257.0, 265.0, 234.0, 234.0, 270.0, 312.0, 274.0, 248.0, 291.0, 282.0, 276.0, 294.0, 253.0, 266.0, 293.0, 280.0, 256.0, 266.0, 278.0, 252.0, 259.0, 263.0, 277.0, 296.0, 282.0, 294.0, 256.0, 269.0, 232.0, 227.0, 285.0, 288.0, 261.0, 261.0, 268.0, 262.0, 279.0, 291.0, 266.0, 256.0, 290.0, 283.0, 259.0, 263.0, 276.0, 251.0, 280.0, 293.0, 268.0, 251.0, 266.0, 264.0, 242.0, 277.0, 298.0, 275.0, 258.0, 267.0, 268.0, 251.0, 257.0, 262.0, 250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711992025997882, "mean_inference_ms": 1.2812842286076684, "mean_action_processing_ms": 0.13553223868846914, "mean_env_wait_ms": 0.8576304489114933, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5478400, "num_agent_steps_trained": 5478400, "num_env_steps_sampled": 2739200, "num_env_steps_trained": 2739200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2739200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5478400, "timers": {"training_iteration_time_ms": 3678.396, "learn_time_ms": 1128.202, "learn_throughput": 11345.488, "synch_weights_time_ms": 14.287}, "counters": {"num_env_steps_sampled": 2739200, "num_env_steps_trained": 2739200, "num_agent_steps_sampled": 5478400, "num_agent_steps_trained": 5478400}, "done": false, "episodes_total": 6848, "training_iteration": 214, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-24", "timestamp": 1666581264, "time_this_iter_s": 3.628016233444214, "time_total_s": 835.1192691326141, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 835.1192691326141, "timesteps_since_restore": 0, "iterations_since_restore": 214, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.083333333333332, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 188.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.95, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.81, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.41, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.41, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.41, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008325978415086865, "policy_loss": 0.0005480643594637513, "vf_loss": 7.812173843383789, "vf_explained_var": 0.6112537384033203, "kl": 0.001983209513127804, "entropy": 0.9933664798736572, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2752000, "num_env_steps_trained": 2752000, "num_agent_steps_sampled": 5504000, "num_agent_steps_trained": 5504000}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 272.775}, "custom_metrics": {"sparse_reward_mean": 188.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 167.95, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.49, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.81, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.36, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.15, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.41, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 3, "optimal_onion_potting_agent_0_mean": 14.41, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.41, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7118862679389156, "mean_inference_ms": 1.2809083346908747, "mean_action_processing_ms": 0.13552358445176294, "mean_env_wait_ms": 0.8574677314031771, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 272.775}, "hist_stats": {"episode_reward": [522.0, 530.0, 570.0, 573.0, 570.0, 570.0, 513.0, 579.0, 510.0, 570.0, 519.0, 530.0, 516.0, 573.0, 510.0, 576.0, 530.0, 573.0, 570.0, 570.0, 573.0, 510.0, 576.0, 522.0, 522.0, 530.0, 579.0, 519.0, 573.0, 527.0, 525.0, 479.0, 570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 272.0, 258.0, 272.0, 288.0, 282.0, 281.0, 292.0, 279.0, 291.0, 288.0, 282.0, 257.0, 256.0, 285.0, 294.0, 253.0, 257.0, 289.0, 281.0, 252.0, 267.0, 262.0, 268.0, 238.0, 278.0, 288.0, 285.0, 259.0, 251.0, 282.0, 294.0, 273.0, 257.0, 299.0, 274.0, 291.0, 279.0, 287.0, 283.0, 291.0, 282.0, 250.0, 260.0, 282.0, 294.0, 260.0, 262.0, 265.0, 257.0, 268.0, 262.0, 298.0, 281.0, 264.0, 255.0, 287.0, 286.0, 259.0, 268.0, 260.0, 265.0, 252.0, 227.0, 291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7118862679389156, "mean_inference_ms": 1.2809083346908747, "mean_action_processing_ms": 0.13552358445176294, "mean_env_wait_ms": 0.8574677314031771, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5504000, "num_agent_steps_trained": 5504000, "num_env_steps_sampled": 2752000, "num_env_steps_trained": 2752000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2752000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5504000, "timers": {"training_iteration_time_ms": 3667.458, "learn_time_ms": 1125.153, "learn_throughput": 11376.233, "synch_weights_time_ms": 14.192}, "counters": {"num_env_steps_sampled": 2752000, "num_env_steps_trained": 2752000, "num_agent_steps_sampled": 5504000, "num_agent_steps_trained": 5504000}, "done": false, "episodes_total": 6880, "training_iteration": 215, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-28", "timestamp": 1666581268, "time_this_iter_s": 3.688774347305298, "time_total_s": 838.8080434799194, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 838.8080434799194, "timesteps_since_restore": 0, "iterations_since_restore": 215, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.96, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 168.26, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.11, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.76, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.76, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.76, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.76, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010710940696299076, "policy_loss": -0.0013515017926692963, "vf_loss": 7.728179931640625, "vf_explained_var": 0.6077972650527954, "kl": 0.002291465178132057, "entropy": 0.9848192930221558, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2764800, "num_env_steps_trained": 2764800, "num_agent_steps_sampled": 5529600, "num_agent_steps_trained": 5529600}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.46, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 272.73}, "custom_metrics": {"sparse_reward_mean": 188.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 168.26, "shaped_reward_min": 145, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.11, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.76, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.29, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 5, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 5, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.76, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.76, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.76, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7117419372962004, "mean_inference_ms": 1.2805191624502714, "mean_action_processing_ms": 0.13551508961273653, "mean_env_wait_ms": 0.8572922734685937, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 465.0, "episode_reward_mean": 545.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 272.73}, "hist_stats": {"episode_reward": [570.0, 579.0, 570.0, 582.0, 513.0, 573.0, 579.0, 522.0, 519.0, 576.0, 519.0, 522.0, 522.0, 525.0, 582.0, 573.0, 533.0, 470.0, 582.0, 530.0, 570.0, 570.0, 513.0, 579.0, 519.0, 510.0, 573.0, 573.0, 522.0, 522.0, 539.0, 573.0, 570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 279.0, 286.0, 293.0, 282.0, 288.0, 303.0, 279.0, 246.0, 267.0, 286.0, 287.0, 285.0, 294.0, 262.0, 260.0, 264.0, 255.0, 293.0, 283.0, 252.0, 267.0, 263.0, 259.0, 266.0, 256.0, 270.0, 255.0, 293.0, 289.0, 288.0, 285.0, 272.0, 261.0, 239.0, 231.0, 289.0, 293.0, 260.0, 270.0, 276.0, 294.0, 280.0, 290.0, 258.0, 255.0, 287.0, 292.0, 265.0, 254.0, 262.0, 248.0, 284.0, 289.0, 286.0, 287.0, 263.0, 259.0, 250.0, 272.0, 273.0, 266.0, 295.0, 278.0, 283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7117419372962004, "mean_inference_ms": 1.2805191624502714, "mean_action_processing_ms": 0.13551508961273653, "mean_env_wait_ms": 0.8572922734685937, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5529600, "num_agent_steps_trained": 5529600, "num_env_steps_sampled": 2764800, "num_env_steps_trained": 2764800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2764800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5529600, "timers": {"training_iteration_time_ms": 3675.84, "learn_time_ms": 1133.075, "learn_throughput": 11296.692, "synch_weights_time_ms": 14.355}, "counters": {"num_env_steps_sampled": 2764800, "num_env_steps_trained": 2764800, "num_agent_steps_sampled": 5529600, "num_agent_steps_trained": 5529600}, "done": false, "episodes_total": 6912, "training_iteration": 216, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-32", "timestamp": 1666581272, "time_this_iter_s": 3.801997661590576, "time_total_s": 842.61004114151, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 842.61004114151, "timesteps_since_restore": 0, "iterations_since_restore": 216, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.566666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 188.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 167.89, "shaped_reward_min": 125, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.94, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.45, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.45, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.45, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015308646252378821, "policy_loss": -0.0018033639062196016, "vf_loss": 7.642875671386719, "vf_explained_var": 0.6139187216758728, "kl": 0.0021336167119443417, "entropy": 0.9835748672485352, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2777600, "num_env_steps_trained": 2777600, "num_agent_steps_sampled": 5555200, "num_agent_steps_trained": 5555200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 405.0, "episode_reward_mean": 543.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.945}, "custom_metrics": {"sparse_reward_mean": 188.0, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 167.89, "shaped_reward_min": 125, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.94, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.58, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.26, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.45, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.45, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.45, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7115811096238751, "mean_inference_ms": 1.2801026959798782, "mean_action_processing_ms": 0.13550184600075288, "mean_env_wait_ms": 0.857080086310055, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 405.0, "episode_reward_mean": 543.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.945}, "hist_stats": {"episode_reward": [570.0, 576.0, 570.0, 513.0, 513.0, 579.0, 530.0, 576.0, 576.0, 522.0, 576.0, 527.0, 576.0, 570.0, 533.0, 522.0, 576.0, 533.0, 527.0, 536.0, 579.0, 525.0, 573.0, 570.0, 525.0, 479.0, 519.0, 579.0, 533.0, 573.0, 525.0, 522.0, 573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 281.0, 295.0, 291.0, 279.0, 256.0, 257.0, 246.0, 267.0, 291.0, 288.0, 260.0, 270.0, 285.0, 291.0, 295.0, 281.0, 254.0, 268.0, 281.0, 295.0, 262.0, 265.0, 289.0, 287.0, 280.0, 290.0, 269.0, 264.0, 263.0, 259.0, 279.0, 297.0, 262.0, 271.0, 276.0, 251.0, 277.0, 259.0, 287.0, 292.0, 270.0, 255.0, 290.0, 283.0, 273.0, 297.0, 260.0, 265.0, 245.0, 234.0, 270.0, 249.0, 287.0, 292.0, 264.0, 269.0, 296.0, 277.0, 274.0, 251.0, 264.0, 258.0, 289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7115811096238751, "mean_inference_ms": 1.2801026959798782, "mean_action_processing_ms": 0.13550184600075288, "mean_env_wait_ms": 0.857080086310055, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5555200, "num_agent_steps_trained": 5555200, "num_env_steps_sampled": 2777600, "num_env_steps_trained": 2777600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2777600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5555200, "timers": {"training_iteration_time_ms": 3663.772, "learn_time_ms": 1128.207, "learn_throughput": 11345.439, "synch_weights_time_ms": 13.645}, "counters": {"num_env_steps_sampled": 2777600, "num_env_steps_trained": 2777600, "num_agent_steps_sampled": 5555200, "num_agent_steps_trained": 5555200}, "done": false, "episodes_total": 6944, "training_iteration": 217, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-36", "timestamp": 1666581276, "time_this_iter_s": 3.6296896934509277, "time_total_s": 846.2397308349609, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 846.2397308349609, "timesteps_since_restore": 0, "iterations_since_restore": 217, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.700000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 187.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 167.64, "shaped_reward_min": 125, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.97, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.47, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.92, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.47, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.47, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001097099855542183, "policy_loss": -0.0013780472800135612, "vf_loss": 7.6653265953063965, "vf_explained_var": 0.6124542951583862, "kl": 0.002241886919364333, "entropy": 0.9711683988571167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2790400, "num_env_steps_trained": 2790400, "num_agent_steps_sampled": 5580800, "num_agent_steps_trained": 5580800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 405.0, "episode_reward_mean": 543.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.62}, "custom_metrics": {"sparse_reward_mean": 187.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 167.64, "shaped_reward_min": 125, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.97, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.3, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.47, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.92, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.47, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.47, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711399980779434, "mean_inference_ms": 1.2796816992981612, "mean_action_processing_ms": 0.13548881730847223, "mean_env_wait_ms": 0.8568613192615742, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 405.0, "episode_reward_mean": 543.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 271.62}, "hist_stats": {"episode_reward": [573.0, 579.0, 465.0, 522.0, 576.0, 522.0, 470.0, 522.0, 527.0, 573.0, 573.0, 576.0, 579.0, 573.0, 570.0, 519.0, 527.0, 536.0, 570.0, 573.0, 522.0, 519.0, 527.0, 522.0, 525.0, 579.0, 522.0, 513.0, 516.0, 576.0, 576.0, 519.0, 573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 299.0, 280.0, 240.0, 225.0, 254.0, 268.0, 286.0, 290.0, 262.0, 260.0, 233.0, 237.0, 281.0, 241.0, 264.0, 263.0, 290.0, 283.0, 288.0, 285.0, 284.0, 292.0, 301.0, 278.0, 288.0, 285.0, 298.0, 272.0, 267.0, 252.0, 253.0, 274.0, 273.0, 263.0, 290.0, 280.0, 293.0, 280.0, 267.0, 255.0, 253.0, 266.0, 255.0, 272.0, 254.0, 268.0, 270.0, 255.0, 282.0, 297.0, 261.0, 261.0, 272.0, 241.0, 262.0, 254.0, 289.0, 287.0, 302.0, 274.0, 247.0, 272.0, 298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711399980779434, "mean_inference_ms": 1.2796816992981612, "mean_action_processing_ms": 0.13548881730847223, "mean_env_wait_ms": 0.8568613192615742, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5580800, "num_agent_steps_trained": 5580800, "num_env_steps_sampled": 2790400, "num_env_steps_trained": 2790400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2790400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5580800, "timers": {"training_iteration_time_ms": 3646.287, "learn_time_ms": 1130.626, "learn_throughput": 11321.157, "synch_weights_time_ms": 12.817}, "counters": {"num_env_steps_sampled": 2790400, "num_env_steps_trained": 2790400, "num_agent_steps_sampled": 5580800, "num_agent_steps_trained": 5580800}, "done": false, "episodes_total": 6976, "training_iteration": 218, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-40", "timestamp": 1666581280, "time_this_iter_s": 3.7043538093566895, "time_total_s": 849.9440846443176, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 849.9440846443176, "timesteps_since_restore": 0, "iterations_since_restore": 218, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.633333333333333}} +{"custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.37, "shaped_reward_min": 125, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.12, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.25, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008129839552566409, "policy_loss": -0.0010989793809130788, "vf_loss": 7.66006326675415, "vf_explained_var": 0.6172770857810974, "kl": 0.0020343316718935966, "entropy": 0.9600198268890381, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2803200, "num_env_steps_trained": 2803200, "num_agent_steps_sampled": 5606400, "num_agent_steps_trained": 5606400}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 405.0, "episode_reward_mean": 548.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 274.285}, "custom_metrics": {"sparse_reward_mean": 189.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 169.37, "shaped_reward_min": 125, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.12, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.25, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7112164400179618, "mean_inference_ms": 1.2792621725564641, "mean_action_processing_ms": 0.13547534800810293, "mean_env_wait_ms": 0.8566341596312739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 405.0, "episode_reward_mean": 548.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 311.0}, "policy_reward_mean": {"ppo": 274.285}, "hist_stats": {"episode_reward": [573.0, 533.0, 570.0, 522.0, 576.0, 510.0, 564.0, 530.0, 582.0, 582.0, 567.0, 570.0, 522.0, 573.0, 522.0, 579.0, 579.0, 522.0, 519.0, 519.0, 525.0, 525.0, 579.0, 576.0, 530.0, 470.0, 525.0, 405.0, 533.0, 573.0, 522.0, 579.0, 519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [298.0, 275.0, 261.0, 272.0, 286.0, 284.0, 265.0, 257.0, 290.0, 286.0, 250.0, 260.0, 285.0, 279.0, 268.0, 262.0, 289.0, 293.0, 293.0, 289.0, 273.0, 294.0, 285.0, 285.0, 268.0, 254.0, 286.0, 287.0, 264.0, 258.0, 297.0, 282.0, 282.0, 297.0, 264.0, 258.0, 249.0, 270.0, 263.0, 256.0, 259.0, 266.0, 264.0, 261.0, 288.0, 291.0, 265.0, 311.0, 261.0, 269.0, 243.0, 227.0, 260.0, 265.0, 200.0, 205.0, 252.0, 281.0, 277.0, 296.0, 272.0, 250.0, 281.0, 298.0, 264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7112164400179618, "mean_inference_ms": 1.2792621725564641, "mean_action_processing_ms": 0.13547534800810293, "mean_env_wait_ms": 0.8566341596312739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5606400, "num_agent_steps_trained": 5606400, "num_env_steps_sampled": 2803200, "num_env_steps_trained": 2803200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2803200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5606400, "timers": {"training_iteration_time_ms": 3625.496, "learn_time_ms": 1119.684, "learn_throughput": 11431.802, "synch_weights_time_ms": 12.637}, "counters": {"num_env_steps_sampled": 2803200, "num_env_steps_trained": 2803200, "num_agent_steps_sampled": 5606400, "num_agent_steps_trained": 5606400}, "done": false, "episodes_total": 7008, "training_iteration": 219, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-44", "timestamp": 1666581284, "time_this_iter_s": 3.5609569549560547, "time_total_s": 853.5050415992737, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 853.5050415992737, "timesteps_since_restore": 0, "iterations_since_restore": 219, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.5, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 170.14, "shaped_reward_min": 148, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.1, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00014584301970899105, "policy_loss": -0.00043676793575286865, "vf_loss": 7.6577043533325195, "vf_explained_var": 0.6050065755844116, "kl": 0.0021726060658693314, "entropy": 0.9496897459030151, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2816000, "num_env_steps_trained": 2816000, "num_agent_steps_sampled": 5632000, "num_agent_steps_trained": 5632000}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 468.0, "episode_reward_mean": 552.14, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 276.07}, "custom_metrics": {"sparse_reward_mean": 191.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 170.14, "shaped_reward_min": 148, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.1, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711028381075877, "mean_inference_ms": 1.2788274392677903, "mean_action_processing_ms": 0.13545884864442959, "mean_env_wait_ms": 0.8563895258714708, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 468.0, "episode_reward_mean": 552.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 276.07}, "hist_stats": {"episode_reward": [519.0, 570.0, 527.0, 573.0, 525.0, 579.0, 525.0, 522.0, 576.0, 522.0, 530.0, 576.0, 576.0, 516.0, 513.0, 579.0, 576.0, 530.0, 519.0, 525.0, 530.0, 525.0, 525.0, 576.0, 527.0, 570.0, 579.0, 522.0, 522.0, 573.0, 519.0, 576.0, 525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 255.0, 291.0, 279.0, 272.0, 255.0, 283.0, 290.0, 257.0, 268.0, 291.0, 288.0, 263.0, 262.0, 257.0, 265.0, 294.0, 282.0, 270.0, 252.0, 278.0, 252.0, 289.0, 287.0, 284.0, 292.0, 264.0, 252.0, 249.0, 264.0, 288.0, 291.0, 293.0, 283.0, 277.0, 253.0, 251.0, 268.0, 265.0, 260.0, 276.0, 254.0, 255.0, 270.0, 257.0, 268.0, 282.0, 294.0, 254.0, 273.0, 295.0, 275.0, 280.0, 299.0, 264.0, 258.0, 259.0, 263.0, 281.0, 292.0, 248.0, 271.0, 283.0, 293.0, 256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.711028381075877, "mean_inference_ms": 1.2788274392677903, "mean_action_processing_ms": 0.13545884864442959, "mean_env_wait_ms": 0.8563895258714708, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5632000, "num_agent_steps_trained": 5632000, "num_env_steps_sampled": 2816000, "num_env_steps_trained": 2816000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2816000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5632000, "timers": {"training_iteration_time_ms": 3621.065, "learn_time_ms": 1118.826, "learn_throughput": 11440.564, "synch_weights_time_ms": 11.987}, "counters": {"num_env_steps_sampled": 2816000, "num_env_steps_trained": 2816000, "num_agent_steps_sampled": 5632000, "num_agent_steps_trained": 5632000}, "done": false, "episodes_total": 7040, "training_iteration": 220, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-47", "timestamp": 1666581287, "time_this_iter_s": 3.691047430038452, "time_total_s": 857.1960890293121, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 857.1960890293121, "timesteps_since_restore": 0, "iterations_since_restore": 220, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.566666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.54, "shaped_reward_min": 148, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.86, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.89, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0024008420296013355, "policy_loss": 0.0021112796384841204, "vf_loss": 7.663437366485596, "vf_explained_var": 0.6173580288887024, "kl": 0.0023920456878840923, "entropy": 0.9535607099533081, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2828800, "num_env_steps_trained": 2828800, "num_agent_steps_sampled": 5657600, "num_agent_steps_trained": 5657600}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 468.0, "episode_reward_mean": 555.94, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.97}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.54, "shaped_reward_min": 148, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.86, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.89, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.58, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7108389704831163, "mean_inference_ms": 1.278375907898961, "mean_action_processing_ms": 0.13543735468459694, "mean_env_wait_ms": 0.8561217690529012, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 468.0, "episode_reward_mean": 555.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.97}, "hist_stats": {"episode_reward": [525.0, 525.0, 576.0, 579.0, 570.0, 570.0, 522.0, 576.0, 576.0, 530.0, 576.0, 576.0, 570.0, 573.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 530.0, 573.0, 570.0, 579.0, 576.0, 573.0, 582.0, 519.0, 525.0, 530.0, 570.0, 579.0, 525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 269.0, 260.0, 265.0, 282.0, 294.0, 287.0, 292.0, 277.0, 293.0, 274.0, 296.0, 269.0, 253.0, 279.0, 297.0, 295.0, 281.0, 269.0, 261.0, 289.0, 287.0, 289.0, 287.0, 278.0, 292.0, 269.0, 304.0, 293.0, 294.0, 286.0, 290.0, 269.0, 261.0, 262.0, 260.0, 301.0, 275.0, 282.0, 297.0, 265.0, 265.0, 283.0, 290.0, 278.0, 292.0, 296.0, 283.0, 298.0, 278.0, 286.0, 287.0, 283.0, 299.0, 257.0, 262.0, 266.0, 259.0, 267.0, 263.0, 278.0, 292.0, 289.0, 290.0, 251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7108389704831163, "mean_inference_ms": 1.278375907898961, "mean_action_processing_ms": 0.13543735468459694, "mean_env_wait_ms": 0.8561217690529012, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5657600, "num_agent_steps_trained": 5657600, "num_env_steps_sampled": 2828800, "num_env_steps_trained": 2828800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2828800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5657600, "timers": {"training_iteration_time_ms": 3611.477, "learn_time_ms": 1112.851, "learn_throughput": 11501.993, "synch_weights_time_ms": 11.026}, "counters": {"num_env_steps_sampled": 2828800, "num_env_steps_trained": 2828800, "num_agent_steps_sampled": 5657600, "num_agent_steps_trained": 5657600}, "done": false, "episodes_total": 7072, "training_iteration": 221, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-51", "timestamp": 1666581291, "time_this_iter_s": 3.676072835922241, "time_total_s": 860.8721618652344, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 860.8721618652344, "timesteps_since_restore": 0, "iterations_since_restore": 221, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.700000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.0, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.26, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.39, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.17, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.72, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.68, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.34, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.17, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.72, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.17, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.72, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011982894502580166, "policy_loss": -0.0014831130392849445, "vf_loss": 7.5979084968566895, "vf_explained_var": 0.6064113974571228, "kl": 0.0021043620072305202, "entropy": 0.9499344229698181, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2841600, "num_env_steps_trained": 2841600, "num_agent_steps_sampled": 5683200, "num_agent_steps_trained": 5683200}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 555.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.5}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.0, "shaped_reward_min": 148, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.26, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.39, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.22, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.17, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.72, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.68, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.34, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.37, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.17, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.72, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.17, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.72, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7106626119466375, "mean_inference_ms": 1.278093081648415, "mean_action_processing_ms": 0.13541617822810648, "mean_env_wait_ms": 0.8560243668252465, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 468.0, "episode_reward_mean": 555.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 233.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.5}, "hist_stats": {"episode_reward": [525.0, 536.0, 525.0, 573.0, 530.0, 576.0, 579.0, 570.0, 579.0, 522.0, 468.0, 524.0, 513.0, 530.0, 570.0, 579.0, 525.0, 579.0, 530.0, 573.0, 570.0, 573.0, 573.0, 573.0, 582.0, 573.0, 579.0, 573.0, 525.0, 573.0, 570.0, 527.0, 573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 274.0, 275.0, 261.0, 259.0, 266.0, 292.0, 281.0, 269.0, 261.0, 298.0, 278.0, 289.0, 290.0, 288.0, 282.0, 300.0, 279.0, 267.0, 255.0, 235.0, 233.0, 274.0, 250.0, 273.0, 240.0, 258.0, 272.0, 290.0, 280.0, 281.0, 298.0, 252.0, 273.0, 299.0, 280.0, 264.0, 266.0, 293.0, 280.0, 293.0, 277.0, 273.0, 300.0, 288.0, 285.0, 281.0, 292.0, 289.0, 293.0, 284.0, 289.0, 293.0, 286.0, 289.0, 284.0, 273.0, 252.0, 282.0, 291.0, 280.0, 290.0, 249.0, 278.0, 293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7106626119466375, "mean_inference_ms": 1.278093081648415, "mean_action_processing_ms": 0.13541617822810648, "mean_env_wait_ms": 0.8560243668252465, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5683200, "num_agent_steps_trained": 5683200, "num_env_steps_sampled": 2841600, "num_env_steps_trained": 2841600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2841600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5683200, "timers": {"training_iteration_time_ms": 3639.618, "learn_time_ms": 1127.503, "learn_throughput": 11352.518, "synch_weights_time_ms": 11.232}, "counters": {"num_env_steps_sampled": 2841600, "num_env_steps_trained": 2841600, "num_agent_steps_sampled": 5683200, "num_agent_steps_trained": 5683200}, "done": false, "episodes_total": 7104, "training_iteration": 222, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-55", "timestamp": 1666581295, "time_this_iter_s": 4.044084072113037, "time_total_s": 864.9162459373474, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 864.9162459373474, "timesteps_since_restore": 0, "iterations_since_restore": 222, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.799999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.23, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.88, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00024267204571515322, "policy_loss": -0.0005397425265982747, "vf_loss": 7.663079261779785, "vf_explained_var": 0.596228837966919, "kl": 0.00204327329993248, "entropy": 0.9384721517562866, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2854400, "num_env_steps_trained": 2854400, "num_agent_steps_sampled": 5708800, "num_agent_steps_trained": 5708800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 559.83, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 279.915}, "custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.23, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.88, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7105164723411486, "mean_inference_ms": 1.2778515473445784, "mean_action_processing_ms": 0.13539721529474044, "mean_env_wait_ms": 0.8559981042684861, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 559.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 279.915}, "hist_stats": {"episode_reward": [573.0, 510.0, 576.0, 516.0, 579.0, 582.0, 576.0, 576.0, 527.0, 533.0, 573.0, 530.0, 516.0, 573.0, 582.0, 530.0, 576.0, 582.0, 522.0, 579.0, 579.0, 576.0, 525.0, 582.0, 573.0, 570.0, 579.0, 530.0, 525.0, 582.0, 576.0, 579.0, 530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 257.0, 253.0, 304.0, 272.0, 266.0, 250.0, 282.0, 297.0, 302.0, 280.0, 289.0, 287.0, 287.0, 289.0, 268.0, 259.0, 275.0, 258.0, 294.0, 279.0, 278.0, 252.0, 250.0, 266.0, 295.0, 278.0, 288.0, 294.0, 269.0, 261.0, 282.0, 294.0, 283.0, 299.0, 258.0, 264.0, 286.0, 293.0, 291.0, 288.0, 271.0, 305.0, 256.0, 269.0, 299.0, 283.0, 288.0, 285.0, 299.0, 271.0, 288.0, 291.0, 265.0, 265.0, 264.0, 261.0, 304.0, 278.0, 281.0, 295.0, 290.0, 289.0, 269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7105164723411486, "mean_inference_ms": 1.2778515473445784, "mean_action_processing_ms": 0.13539721529474044, "mean_env_wait_ms": 0.8559981042684861, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5708800, "num_agent_steps_trained": 5708800, "num_env_steps_sampled": 2854400, "num_env_steps_trained": 2854400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2854400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5708800, "timers": {"training_iteration_time_ms": 3663.188, "learn_time_ms": 1138.019, "learn_throughput": 11247.616, "synch_weights_time_ms": 11.241}, "counters": {"num_env_steps_sampled": 2854400, "num_env_steps_trained": 2854400, "num_agent_steps_sampled": 5708800, "num_agent_steps_trained": 5708800}, "done": false, "episodes_total": 7136, "training_iteration": 223, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-14-59", "timestamp": 1666581299, "time_this_iter_s": 3.808103084564209, "time_total_s": 868.7243490219116, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 868.7243490219116, "timesteps_since_restore": 0, "iterations_since_restore": 223, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.75, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.71, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.66, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.94, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.5, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.5, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.5, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00046854279935359955, "policy_loss": 0.00016595772467553616, "vf_loss": 7.699159622192383, "vf_explained_var": 0.617074728012085, "kl": 0.0019009055104106665, "entropy": 0.9346585273742676, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2867200, "num_env_steps_trained": 2867200, "num_agent_steps_sampled": 5734400, "num_agent_steps_trained": 5734400}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 513.0, "episode_reward_mean": 561.91, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 280.955}, "custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.71, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.66, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.94, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.5, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.84, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.34, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.5, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.5, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7103830562160084, "mean_inference_ms": 1.2776410987550098, "mean_action_processing_ms": 0.13537899989677257, "mean_env_wait_ms": 0.8559871633031326, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 513.0, "episode_reward_mean": 561.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 280.955}, "hist_stats": {"episode_reward": [530.0, 522.0, 522.0, 516.0, 525.0, 570.0, 573.0, 582.0, 576.0, 570.0, 521.0, 525.0, 576.0, 582.0, 516.0, 519.0, 522.0, 573.0, 573.0, 525.0, 579.0, 525.0, 533.0, 581.0, 570.0, 573.0, 579.0, 570.0, 576.0, 579.0, 573.0, 573.0, 519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 261.0, 256.0, 266.0, 250.0, 272.0, 258.0, 258.0, 267.0, 258.0, 280.0, 290.0, 294.0, 279.0, 284.0, 298.0, 300.0, 276.0, 269.0, 301.0, 262.0, 259.0, 245.0, 280.0, 284.0, 292.0, 303.0, 279.0, 257.0, 259.0, 261.0, 258.0, 268.0, 254.0, 281.0, 292.0, 285.0, 288.0, 248.0, 277.0, 299.0, 280.0, 254.0, 271.0, 261.0, 272.0, 282.0, 299.0, 293.0, 277.0, 286.0, 287.0, 286.0, 293.0, 287.0, 283.0, 280.0, 296.0, 291.0, 288.0, 300.0, 273.0, 290.0, 283.0, 256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7103830562160084, "mean_inference_ms": 1.2776410987550098, "mean_action_processing_ms": 0.13537899989677257, "mean_env_wait_ms": 0.8559871633031326, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5734400, "num_agent_steps_trained": 5734400, "num_env_steps_sampled": 2867200, "num_env_steps_trained": 2867200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2867200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5734400, "timers": {"training_iteration_time_ms": 3681.002, "learn_time_ms": 1151.865, "learn_throughput": 11112.412, "synch_weights_time_ms": 11.456}, "counters": {"num_env_steps_sampled": 2867200, "num_env_steps_trained": 2867200, "num_agent_steps_sampled": 5734400, "num_agent_steps_trained": 5734400}, "done": false, "episodes_total": 7168, "training_iteration": 224, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-03", "timestamp": 1666581303, "time_this_iter_s": 3.812587261199951, "time_total_s": 872.5369362831116, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 872.5369362831116, "timesteps_since_restore": 0, "iterations_since_restore": 224, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.4, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.0, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.42, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004360268940217793, "policy_loss": -0.0007370202802121639, "vf_loss": 7.678671360015869, "vf_explained_var": 0.6376967430114746, "kl": 0.00243803090415895, "entropy": 0.9337455630302429, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2880000, "num_env_steps_trained": 2880000, "num_agent_steps_sampled": 5760000, "num_agent_steps_trained": 5760000}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 513.0, "episode_reward_mean": 568.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 284.1}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.0, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.42, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.72, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7102671171729327, "mean_inference_ms": 1.2772917092306577, "mean_action_processing_ms": 0.13536466547217546, "mean_env_wait_ms": 0.8558784022983323, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 513.0, "episode_reward_mean": 568.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 284.1}, "hist_stats": {"episode_reward": [519.0, 522.0, 576.0, 570.0, 579.0, 582.0, 579.0, 573.0, 522.0, 573.0, 561.0, 525.0, 573.0, 525.0, 579.0, 579.0, 576.0, 573.0, 573.0, 627.0, 570.0, 581.0, 513.0, 527.0, 582.0, 533.0, 573.0, 573.0, 573.0, 582.0, 576.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [256.0, 263.0, 268.0, 254.0, 280.0, 296.0, 298.0, 272.0, 294.0, 285.0, 301.0, 281.0, 298.0, 281.0, 284.0, 289.0, 268.0, 254.0, 297.0, 276.0, 266.0, 295.0, 256.0, 269.0, 278.0, 295.0, 277.0, 248.0, 289.0, 290.0, 296.0, 283.0, 289.0, 287.0, 286.0, 287.0, 280.0, 293.0, 318.0, 309.0, 291.0, 279.0, 272.0, 309.0, 276.0, 237.0, 256.0, 271.0, 286.0, 296.0, 254.0, 279.0, 273.0, 300.0, 289.0, 284.0, 287.0, 286.0, 292.0, 290.0, 283.0, 293.0, 292.0, 287.0, 285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7102671171729327, "mean_inference_ms": 1.2772917092306577, "mean_action_processing_ms": 0.13536466547217546, "mean_env_wait_ms": 0.8558784022983323, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5760000, "num_agent_steps_trained": 5760000, "num_env_steps_sampled": 2880000, "num_env_steps_trained": 2880000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2880000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5760000, "timers": {"training_iteration_time_ms": 3679.165, "learn_time_ms": 1150.927, "learn_throughput": 11121.47, "synch_weights_time_ms": 11.853}, "counters": {"num_env_steps_sampled": 2880000, "num_env_steps_trained": 2880000, "num_agent_steps_sampled": 5760000, "num_agent_steps_trained": 5760000}, "done": false, "episodes_total": 7200, "training_iteration": 225, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-07", "timestamp": 1666581307, "time_this_iter_s": 3.679936408996582, "time_total_s": 876.2168726921082, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 876.2168726921082, "timesteps_since_restore": 0, "iterations_since_restore": 225, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.200000000000003, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.63, "shaped_reward_min": 60, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.66, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.6, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002445029327645898, "policy_loss": 0.002136027906090021, "vf_loss": 7.732485771179199, "vf_explained_var": 0.622205913066864, "kl": 0.002588339149951935, "entropy": 0.9284940958023071, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2892800, "num_env_steps_trained": 2892800, "num_agent_steps_sampled": 5785600, "num_agent_steps_trained": 5785600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 282.715}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.63, "shaped_reward_min": 60, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.66, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.46, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.6, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.46, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.46, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7101525919658326, "mean_inference_ms": 1.2769321856060183, "mean_action_processing_ms": 0.1353521633293626, "mean_env_wait_ms": 0.8556845453719546, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 180.0, "episode_reward_mean": 565.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 282.715}, "hist_stats": {"episode_reward": [579.0, 582.0, 576.0, 582.0, 579.0, 530.0, 579.0, 527.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 576.0, 573.0, 584.0, 576.0, 576.0, 573.0, 519.0, 522.0, 573.0, 579.0, 570.0, 573.0, 573.0, 519.0, 576.0, 582.0, 582.0, 573.0, 522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 294.0, 300.0, 282.0, 289.0, 287.0, 288.0, 294.0, 284.0, 295.0, 262.0, 268.0, 294.0, 285.0, 269.0, 258.0, 301.0, 278.0, 288.0, 291.0, 301.0, 272.0, 280.0, 290.0, 304.0, 278.0, 286.0, 296.0, 282.0, 294.0, 290.0, 283.0, 288.0, 296.0, 298.0, 278.0, 289.0, 287.0, 282.0, 291.0, 258.0, 261.0, 260.0, 262.0, 290.0, 283.0, 287.0, 292.0, 281.0, 289.0, 281.0, 292.0, 286.0, 287.0, 272.0, 247.0, 288.0, 288.0, 298.0, 284.0, 283.0, 299.0, 295.0, 278.0, 272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7101525919658326, "mean_inference_ms": 1.2769321856060183, "mean_action_processing_ms": 0.1353521633293626, "mean_env_wait_ms": 0.8556845453719546, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5785600, "num_agent_steps_trained": 5785600, "num_env_steps_sampled": 2892800, "num_env_steps_trained": 2892800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2892800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5785600, "timers": {"training_iteration_time_ms": 3665.77, "learn_time_ms": 1139.941, "learn_throughput": 11228.652, "synch_weights_time_ms": 11.812}, "counters": {"num_env_steps_sampled": 2892800, "num_env_steps_trained": 2892800, "num_agent_steps_sampled": 5785600, "num_agent_steps_trained": 5785600}, "done": false, "episodes_total": 7232, "training_iteration": 226, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-11", "timestamp": 1666581311, "time_this_iter_s": 3.691885471343994, "time_total_s": 879.9087581634521, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 879.9087581634521, "timesteps_since_restore": 0, "iterations_since_restore": 226, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.580000000000002, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.61, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.56, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002218155190348625, "policy_loss": -0.0025398321449756622, "vf_loss": 7.840729713439941, "vf_explained_var": 0.6095919609069824, "kl": 0.002450748812407255, "entropy": 0.9247908592224121, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2905600, "num_env_steps_trained": 2905600, "num_agent_steps_sampled": 5811200, "num_agent_steps_trained": 5811200}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 180.0, "episode_reward_mean": 561.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 280.805}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.61, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.56, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 4, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7100616240854177, "mean_inference_ms": 1.27656070500398, "mean_action_processing_ms": 0.13533752460087545, "mean_env_wait_ms": 0.8554818419261924, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 180.0, "episode_reward_mean": 561.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 280.805}, "hist_stats": {"episode_reward": [522.0, 539.0, 573.0, 582.0, 576.0, 576.0, 579.0, 530.0, 579.0, 582.0, 570.0, 573.0, 576.0, 576.0, 576.0, 582.0, 525.0, 582.0, 584.0, 570.0, 579.0, 627.0, 573.0, 579.0, 582.0, 587.0, 530.0, 525.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [272.0, 250.0, 262.0, 277.0, 284.0, 289.0, 300.0, 282.0, 286.0, 290.0, 288.0, 288.0, 290.0, 289.0, 252.0, 278.0, 296.0, 283.0, 279.0, 303.0, 283.0, 287.0, 283.0, 290.0, 278.0, 298.0, 285.0, 291.0, 278.0, 298.0, 294.0, 288.0, 262.0, 263.0, 299.0, 283.0, 293.0, 291.0, 276.0, 294.0, 290.0, 289.0, 316.0, 311.0, 278.0, 295.0, 279.0, 300.0, 282.0, 300.0, 296.0, 291.0, 280.0, 250.0, 255.0, 270.0, 282.0, 302.0, 280.0, 299.0, 288.0, 294.0, 295.0, 284.0, 286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7100616240854177, "mean_inference_ms": 1.27656070500398, "mean_action_processing_ms": 0.13533752460087545, "mean_env_wait_ms": 0.8554818419261924, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5811200, "num_agent_steps_trained": 5811200, "num_env_steps_sampled": 2905600, "num_env_steps_trained": 2905600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2905600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5811200, "timers": {"training_iteration_time_ms": 3670.46, "learn_time_ms": 1136.875, "learn_throughput": 11258.933, "synch_weights_time_ms": 11.752}, "counters": {"num_env_steps_sampled": 2905600, "num_env_steps_trained": 2905600, "num_agent_steps_sampled": 5811200, "num_agent_steps_trained": 5811200}, "done": false, "episodes_total": 7264, "training_iteration": 227, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-15", "timestamp": 1666581315, "time_this_iter_s": 3.6734375953674316, "time_total_s": 883.5821957588196, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 883.5821957588196, "timesteps_since_restore": 0, "iterations_since_restore": 227, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.250000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 172.14, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.2, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.43, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.19, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.19, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.19, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002215281594544649, "policy_loss": -0.0025261661503463984, "vf_loss": 7.811648845672607, "vf_explained_var": 0.6087123155593872, "kl": 0.0020283572375774384, "entropy": 0.9405587911605835, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2918400, "num_env_steps_trained": 2918400, "num_agent_steps_sampled": 5836800, "num_agent_steps_trained": 5836800}, "sampler_results": {"episode_reward_max": 590.0, "episode_reward_min": 180.0, "episode_reward_mean": 558.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.27}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 60, "sparse_reward_max": 200, "shaped_reward_mean": 172.14, "shaped_reward_min": 60, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.2, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.43, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.19, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.19, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.19, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7099584001922209, "mean_inference_ms": 1.276192459329255, "mean_action_processing_ms": 0.13532474653956864, "mean_env_wait_ms": 0.8552878576602316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 590.0, "episode_reward_min": 180.0, "episode_reward_mean": 558.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.27}, "hist_stats": {"episode_reward": [576.0, 582.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 570.0, 573.0, 482.0, 573.0, 576.0, 582.0, 576.0, 527.0, 570.0, 573.0, 180.0, 524.0, 582.0, 567.0, 522.0, 573.0, 579.0, 576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 299.0, 283.0, 286.0, 290.0, 299.0, 283.0, 278.0, 304.0, 284.0, 298.0, 262.0, 263.0, 282.0, 297.0, 284.0, 292.0, 287.0, 289.0, 289.0, 293.0, 296.0, 288.0, 279.0, 300.0, 292.0, 290.0, 294.0, 285.0, 284.0, 286.0, 288.0, 285.0, 247.0, 235.0, 294.0, 279.0, 289.0, 287.0, 297.0, 285.0, 279.0, 297.0, 262.0, 265.0, 274.0, 296.0, 283.0, 290.0, 89.0, 91.0, 266.0, 258.0, 278.0, 304.0, 288.0, 279.0, 255.0, 267.0, 285.0, 288.0, 289.0, 290.0, 299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7099584001922209, "mean_inference_ms": 1.276192459329255, "mean_action_processing_ms": 0.13532474653956864, "mean_env_wait_ms": 0.8552878576602316, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5836800, "num_agent_steps_trained": 5836800, "num_env_steps_sampled": 2918400, "num_env_steps_trained": 2918400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2918400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5836800, "timers": {"training_iteration_time_ms": 3671.44, "learn_time_ms": 1137.158, "learn_throughput": 11256.132, "synch_weights_time_ms": 12.303}, "counters": {"num_env_steps_sampled": 2918400, "num_env_steps_trained": 2918400, "num_agent_steps_sampled": 5836800, "num_agent_steps_trained": 5836800}, "done": false, "episodes_total": 7296, "training_iteration": 228, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-19", "timestamp": 1666581319, "time_this_iter_s": 3.7271084785461426, "time_total_s": 887.3093042373657, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 887.3093042373657, "timesteps_since_restore": 0, "iterations_since_restore": 228, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.580000000000002, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 172.48, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.13, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.13, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.13, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000783019233494997, "policy_loss": 0.00048736238386482, "vf_loss": 7.665143013000488, "vf_explained_var": 0.6067088842391968, "kl": 0.0021572881378233433, "entropy": 0.9417125582695007, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2931200, "num_env_steps_trained": 2931200, "num_agent_steps_sampled": 5862400, "num_agent_steps_trained": 5862400}, "sampler_results": {"episode_reward_max": 590.0, "episode_reward_min": 465.0, "episode_reward_mean": 559.68, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 218.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.84}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 172.48, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.36, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 5, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.19, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 5, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.13, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.13, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.13, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7098545863064848, "mean_inference_ms": 1.2758538422651737, "mean_action_processing_ms": 0.13531428434171733, "mean_env_wait_ms": 0.8551108870485021, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 590.0, "episode_reward_min": 465.0, "episode_reward_mean": 559.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 218.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.84}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 582.0, 579.0, 522.0, 582.0, 573.0, 561.0, 522.0, 525.0, 573.0, 525.0, 519.0, 573.0, 573.0, 584.0, 573.0, 576.0, 582.0, 530.0, 465.0, 570.0, 579.0, 579.0, 590.0, 582.0, 573.0, 530.0, 582.0, 519.0, 513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [299.0, 277.0, 291.0, 288.0, 298.0, 284.0, 285.0, 294.0, 281.0, 301.0, 288.0, 291.0, 269.0, 253.0, 295.0, 287.0, 285.0, 288.0, 283.0, 278.0, 249.0, 273.0, 262.0, 263.0, 292.0, 281.0, 264.0, 261.0, 262.0, 257.0, 295.0, 278.0, 294.0, 279.0, 287.0, 297.0, 270.0, 303.0, 277.0, 299.0, 292.0, 290.0, 278.0, 252.0, 218.0, 247.0, 280.0, 290.0, 280.0, 299.0, 289.0, 290.0, 299.0, 291.0, 287.0, 295.0, 279.0, 294.0, 248.0, 282.0, 289.0, 293.0, 260.0, 259.0, 262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7098545863064848, "mean_inference_ms": 1.2758538422651737, "mean_action_processing_ms": 0.13531428434171733, "mean_env_wait_ms": 0.8551108870485021, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5862400, "num_agent_steps_trained": 5862400, "num_env_steps_sampled": 2931200, "num_env_steps_trained": 2931200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2931200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5862400, "timers": {"training_iteration_time_ms": 3680.392, "learn_time_ms": 1138.268, "learn_throughput": 11245.159, "synch_weights_time_ms": 11.92}, "counters": {"num_env_steps_sampled": 2931200, "num_env_steps_trained": 2931200, "num_agent_steps_sampled": 5862400, "num_agent_steps_trained": 5862400}, "done": false, "episodes_total": 7328, "training_iteration": 229, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-23", "timestamp": 1666581323, "time_this_iter_s": 3.6412642002105713, "time_total_s": 890.9505684375763, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 890.9505684375763, "timesteps_since_restore": 0, "iterations_since_restore": 229, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.46666666666667, "ram_util_percent": 10.633333333333333}} +{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.92, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.31, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.51, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.05, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.99, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.99, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.99, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00044891354627907276, "policy_loss": 0.000144109595566988, "vf_loss": 7.779896259307861, "vf_explained_var": 0.5919172167778015, "kl": 0.002152523258700967, "entropy": 0.9463684558868408, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2944000, "num_env_steps_trained": 2944000, "num_agent_steps_sampled": 5888000, "num_agent_steps_trained": 5888000}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 478.0, "episode_reward_mean": 558.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 238.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.16}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.92, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.31, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.51, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.05, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.26, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.99, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.61, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 4, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.99, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.99, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7097215694937031, "mean_inference_ms": 1.2755180095867495, "mean_action_processing_ms": 0.13530297641326697, "mean_env_wait_ms": 0.8549260743234761, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 478.0, "episode_reward_mean": 558.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 238.0}, "policy_reward_max": {"ppo": 304.0}, "policy_reward_mean": {"ppo": 279.16}, "hist_stats": {"episode_reward": [513.0, 576.0, 579.0, 525.0, 573.0, 525.0, 573.0, 516.0, 579.0, 576.0, 576.0, 579.0, 576.0, 573.0, 570.0, 576.0, 573.0, 579.0, 570.0, 525.0, 584.0, 579.0, 519.0, 522.0, 530.0, 581.0, 519.0, 576.0, 582.0, 570.0, 573.0, 579.0, 527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 251.0, 302.0, 274.0, 290.0, 289.0, 250.0, 275.0, 303.0, 270.0, 263.0, 262.0, 292.0, 281.0, 255.0, 261.0, 294.0, 285.0, 289.0, 287.0, 294.0, 282.0, 293.0, 286.0, 304.0, 272.0, 294.0, 279.0, 276.0, 294.0, 280.0, 296.0, 278.0, 295.0, 281.0, 298.0, 286.0, 284.0, 259.0, 266.0, 299.0, 285.0, 281.0, 298.0, 264.0, 255.0, 259.0, 263.0, 261.0, 269.0, 285.0, 296.0, 261.0, 258.0, 281.0, 295.0, 294.0, 288.0, 282.0, 288.0, 292.0, 281.0, 299.0, 280.0, 251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7097215694937031, "mean_inference_ms": 1.2755180095867495, "mean_action_processing_ms": 0.13530297641326697, "mean_env_wait_ms": 0.8549260743234761, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5888000, "num_agent_steps_trained": 5888000, "num_env_steps_sampled": 2944000, "num_env_steps_trained": 2944000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2944000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5888000, "timers": {"training_iteration_time_ms": 3675.836, "learn_time_ms": 1136.519, "learn_throughput": 11262.463, "synch_weights_time_ms": 11.787}, "counters": {"num_env_steps_sampled": 2944000, "num_env_steps_trained": 2944000, "num_agent_steps_sampled": 5888000, "num_agent_steps_trained": 5888000}, "done": false, "episodes_total": 7360, "training_iteration": 230, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-26", "timestamp": 1666581326, "time_this_iter_s": 3.639521360397339, "time_total_s": 894.5900897979736, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 894.5900897979736, "timesteps_since_restore": 0, "iterations_since_restore": 230, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.69, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.61, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.76, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.53, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.53, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.53, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0030337772332131863, "policy_loss": -0.0033358775544911623, "vf_loss": 7.728958606719971, "vf_explained_var": 0.5867734551429749, "kl": 0.002050921320915222, "entropy": 0.9415899515151978, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2956800, "num_env_steps_trained": 2956800, "num_agent_steps_sampled": 5913600, "num_agent_steps_trained": 5913600}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 478.0, "episode_reward_mean": 557.29, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 278.645}, "custom_metrics": {"sparse_reward_mean": 192.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 171.69, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.61, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.96, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.76, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.53, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 12, "useful_dish_pickup_agent_0_mean": 5.04, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.53, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.53, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7095770796673102, "mean_inference_ms": 1.2751883468459717, "mean_action_processing_ms": 0.135291247180748, "mean_env_wait_ms": 0.8547402841339049, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 478.0, "episode_reward_mean": 557.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 236.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 278.645}, "hist_stats": {"episode_reward": [527.0, 576.0, 570.0, 525.0, 573.0, 530.0, 525.0, 525.0, 570.0, 530.0, 576.0, 573.0, 587.0, 570.0, 570.0, 582.0, 576.0, 527.0, 570.0, 570.0, 582.0, 582.0, 576.0, 573.0, 576.0, 570.0, 570.0, 522.0, 573.0, 530.0, 521.0, 530.0, 487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 276.0, 284.0, 292.0, 298.0, 272.0, 272.0, 253.0, 289.0, 284.0, 267.0, 263.0, 265.0, 260.0, 267.0, 258.0, 284.0, 286.0, 265.0, 265.0, 298.0, 278.0, 290.0, 283.0, 287.0, 300.0, 297.0, 273.0, 277.0, 293.0, 300.0, 282.0, 278.0, 298.0, 264.0, 263.0, 295.0, 275.0, 288.0, 282.0, 300.0, 282.0, 293.0, 289.0, 282.0, 294.0, 280.0, 293.0, 282.0, 294.0, 279.0, 291.0, 281.0, 289.0, 254.0, 268.0, 286.0, 287.0, 265.0, 265.0, 262.0, 259.0, 269.0, 261.0, 244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7095770796673102, "mean_inference_ms": 1.2751883468459717, "mean_action_processing_ms": 0.135291247180748, "mean_env_wait_ms": 0.8547402841339049, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5913600, "num_agent_steps_trained": 5913600, "num_env_steps_sampled": 2956800, "num_env_steps_trained": 2956800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2956800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5913600, "timers": {"training_iteration_time_ms": 3666.75, "learn_time_ms": 1122.362, "learn_throughput": 11404.518, "synch_weights_time_ms": 11.831}, "counters": {"num_env_steps_sampled": 2956800, "num_env_steps_trained": 2956800, "num_agent_steps_sampled": 5913600, "num_agent_steps_trained": 5913600}, "done": false, "episodes_total": 7392, "training_iteration": 231, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-30", "timestamp": 1666581330, "time_this_iter_s": 3.5793042182922363, "time_total_s": 898.1693940162659, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 898.1693940162659, "timesteps_since_restore": 0, "iterations_since_restore": 231, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.96, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 171.14, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.66, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.16, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.16, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.16, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002754440763965249, "policy_loss": -0.0030683977529406548, "vf_loss": 7.847390651702881, "vf_explained_var": 0.5783874988555908, "kl": 0.0022289445623755455, "entropy": 0.9415615797042847, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2969600, "num_env_steps_trained": 2969600, "num_agent_steps_sampled": 5939200, "num_agent_steps_trained": 5939200}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 555.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.77}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 171.14, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.58, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.66, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 15.16, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.55, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.26, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.16, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.55, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.16, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.55, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7094305216316347, "mean_inference_ms": 1.2748349341795286, "mean_action_processing_ms": 0.13527921787792216, "mean_env_wait_ms": 0.8545440624324186, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 555.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 277.77}, "hist_stats": {"episode_reward": [487.0, 576.0, 579.0, 584.0, 584.0, 573.0, 573.0, 530.0, 570.0, 582.0, 576.0, 530.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 579.0, 573.0, 522.0, 525.0, 576.0, 573.0, 570.0, 530.0, 525.0, 570.0, 573.0, 478.0, 573.0, 533.0, 573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [244.0, 243.0, 286.0, 290.0, 290.0, 289.0, 300.0, 284.0, 289.0, 295.0, 294.0, 279.0, 287.0, 286.0, 272.0, 258.0, 274.0, 296.0, 285.0, 297.0, 290.0, 286.0, 274.0, 256.0, 287.0, 286.0, 259.0, 271.0, 279.0, 297.0, 295.0, 278.0, 283.0, 287.0, 285.0, 288.0, 287.0, 292.0, 294.0, 279.0, 264.0, 258.0, 275.0, 250.0, 284.0, 292.0, 288.0, 285.0, 276.0, 294.0, 256.0, 274.0, 263.0, 262.0, 280.0, 290.0, 282.0, 291.0, 238.0, 240.0, 303.0, 270.0, 263.0, 270.0, 287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7094305216316347, "mean_inference_ms": 1.2748349341795286, "mean_action_processing_ms": 0.13527921787792216, "mean_env_wait_ms": 0.8545440624324186, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5939200, "num_agent_steps_trained": 5939200, "num_env_steps_sampled": 2969600, "num_env_steps_trained": 2969600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2969600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5939200, "timers": {"training_iteration_time_ms": 3630.374, "learn_time_ms": 1102.93, "learn_throughput": 11605.452, "synch_weights_time_ms": 11.753}, "counters": {"num_env_steps_sampled": 2969600, "num_env_steps_trained": 2969600, "num_agent_steps_sampled": 5939200, "num_agent_steps_trained": 5939200}, "done": false, "episodes_total": 7424, "training_iteration": 232, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-34", "timestamp": 1666581334, "time_this_iter_s": 3.6799514293670654, "time_total_s": 901.8493454456329, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 901.8493454456329, "timesteps_since_restore": 0, "iterations_since_restore": 232, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.416666666666668, "ram_util_percent": 10.633333333333333}} +{"custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.3, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.06, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -1.860898919403553e-05, "policy_loss": -0.0003416483523324132, "vf_loss": 7.904554843902588, "vf_explained_var": 0.5716251134872437, "kl": 0.0022404068149626255, "entropy": 0.9348317384719849, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2982400, "num_env_steps_trained": 2982400, "num_agent_steps_sampled": 5964800, "num_agent_steps_trained": 5964800}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 553.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 276.95}, "custom_metrics": {"sparse_reward_mean": 191.8, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.3, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.06, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.97, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.5, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.97, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.97, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7093009440633637, "mean_inference_ms": 1.2745047914178103, "mean_action_processing_ms": 0.135271318290213, "mean_env_wait_ms": 0.8543809304373801, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 553.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 276.95}, "hist_stats": {"episode_reward": [573.0, 570.0, 525.0, 522.0, 576.0, 579.0, 573.0, 581.0, 525.0, 573.0, 570.0, 570.0, 527.0, 484.0, 576.0, 582.0, 573.0, 530.0, 567.0, 519.0, 573.0, 579.0, 582.0, 570.0, 570.0, 570.0, 527.0, 522.0, 582.0, 510.0, 530.0, 522.0, 582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 295.0, 275.0, 273.0, 252.0, 252.0, 270.0, 282.0, 294.0, 297.0, 282.0, 286.0, 287.0, 298.0, 283.0, 260.0, 265.0, 293.0, 280.0, 281.0, 289.0, 268.0, 302.0, 270.0, 257.0, 236.0, 248.0, 285.0, 291.0, 287.0, 295.0, 286.0, 287.0, 256.0, 274.0, 286.0, 281.0, 253.0, 266.0, 276.0, 297.0, 292.0, 287.0, 291.0, 291.0, 279.0, 291.0, 288.0, 282.0, 285.0, 285.0, 257.0, 270.0, 266.0, 256.0, 294.0, 288.0, 263.0, 247.0, 263.0, 267.0, 252.0, 270.0, 285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7093009440633637, "mean_inference_ms": 1.2745047914178103, "mean_action_processing_ms": 0.135271318290213, "mean_env_wait_ms": 0.8543809304373801, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5964800, "num_agent_steps_trained": 5964800, "num_env_steps_sampled": 2982400, "num_env_steps_trained": 2982400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2982400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5964800, "timers": {"training_iteration_time_ms": 3627.366, "learn_time_ms": 1109.897, "learn_throughput": 11532.6, "synch_weights_time_ms": 11.141}, "counters": {"num_env_steps_sampled": 2982400, "num_env_steps_trained": 2982400, "num_agent_steps_sampled": 5964800, "num_agent_steps_trained": 5964800}, "done": false, "episodes_total": 7456, "training_iteration": 233, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-38", "timestamp": 1666581338, "time_this_iter_s": 3.7769134044647217, "time_total_s": 905.6262588500977, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 905.6262588500977, "timesteps_since_restore": 0, "iterations_since_restore": 233, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.16, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.29, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.0, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.49, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011096132220700383, "policy_loss": -0.0014260424068197608, "vf_loss": 7.822549343109131, "vf_explained_var": 0.5857524871826172, "kl": 0.0024624252691864967, "entropy": 0.9316513538360596, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 2995200, "num_env_steps_trained": 2995200, "num_agent_steps_sampled": 5990400, "num_agent_steps_trained": 5990400}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 552.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 276.345}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.29, "shaped_reward_min": 122, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.0, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.49, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 13.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.28, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.46, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.29, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.77, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.08, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 3.94, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7091450891616614, "mean_inference_ms": 1.274153082248647, "mean_action_processing_ms": 0.13526185222798692, "mean_env_wait_ms": 0.8541923439015997, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 402.0, "episode_reward_mean": 552.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 305.0}, "policy_reward_mean": {"ppo": 276.345}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 570.0, 576.0, 516.0, 530.0, 579.0, 573.0, 570.0, 576.0, 587.0, 561.0, 487.0, 573.0, 402.0, 516.0, 573.0, 579.0, 576.0, 527.0, 579.0, 576.0, 516.0, 579.0, 530.0, 573.0, 567.0, 530.0, 582.0, 570.0, 576.0, 573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 291.0, 285.0, 284.0, 289.0, 291.0, 279.0, 283.0, 293.0, 257.0, 259.0, 265.0, 265.0, 287.0, 292.0, 286.0, 287.0, 286.0, 284.0, 295.0, 281.0, 289.0, 298.0, 286.0, 275.0, 259.0, 228.0, 276.0, 297.0, 199.0, 203.0, 264.0, 252.0, 290.0, 283.0, 294.0, 285.0, 293.0, 283.0, 276.0, 251.0, 293.0, 286.0, 286.0, 290.0, 254.0, 262.0, 305.0, 274.0, 268.0, 262.0, 277.0, 296.0, 286.0, 281.0, 272.0, 258.0, 293.0, 289.0, 293.0, 277.0, 293.0, 283.0, 294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7091450891616614, "mean_inference_ms": 1.274153082248647, "mean_action_processing_ms": 0.13526185222798692, "mean_env_wait_ms": 0.8541923439015997, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 5990400, "num_agent_steps_trained": 5990400, "num_env_steps_sampled": 2995200, "num_env_steps_trained": 2995200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 2995200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 5990400, "timers": {"training_iteration_time_ms": 3619.728, "learn_time_ms": 1102.386, "learn_throughput": 11611.176, "synch_weights_time_ms": 10.479}, "counters": {"num_env_steps_sampled": 2995200, "num_env_steps_trained": 2995200, "num_agent_steps_sampled": 5990400, "num_agent_steps_trained": 5990400}, "done": false, "episodes_total": 7488, "training_iteration": 234, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-42", "timestamp": 1666581342, "time_this_iter_s": 3.723451614379883, "time_total_s": 909.3497104644775, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 909.3497104644775, "timesteps_since_restore": 0, "iterations_since_restore": 234, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.5, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.08, "shaped_reward_min": 128, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 13.98, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.0, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.74, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.63, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.08, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.74, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.74, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -1.2165401130914688e-06, "policy_loss": -0.00031783897429704666, "vf_loss": 7.84594202041626, "vf_explained_var": 0.5760541558265686, "kl": 0.0020695971325039864, "entropy": 0.9359432458877563, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3008000, "num_env_steps_trained": 3008000, "num_agent_steps_sampled": 6016000, "num_agent_steps_trained": 6016000}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 408.0, "episode_reward_mean": 552.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 276.24}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.08, "shaped_reward_min": 128, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.27, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 13.98, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.0, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 4, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 4, "potting_onion_agent_0_mean": 13.74, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.2, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.63, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.19, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.38, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.21, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.08, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 13.74, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.74, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7089912828807431, "mean_inference_ms": 1.2738039305353777, "mean_action_processing_ms": 0.135251658168302, "mean_env_wait_ms": 0.8539967288879649, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 408.0, "episode_reward_mean": 552.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 276.24}, "hist_stats": {"episode_reward": [573.0, 576.0, 527.0, 527.0, 579.0, 573.0, 584.0, 525.0, 527.0, 579.0, 573.0, 570.0, 525.0, 576.0, 573.0, 504.0, 519.0, 527.0, 582.0, 530.0, 581.0, 516.0, 576.0, 570.0, 459.0, 522.0, 510.0, 576.0, 582.0, 530.0, 530.0, 576.0, 570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 279.0, 287.0, 289.0, 266.0, 261.0, 270.0, 257.0, 293.0, 286.0, 287.0, 286.0, 294.0, 290.0, 268.0, 257.0, 255.0, 272.0, 295.0, 284.0, 279.0, 294.0, 292.0, 278.0, 269.0, 256.0, 293.0, 283.0, 271.0, 302.0, 252.0, 252.0, 256.0, 263.0, 262.0, 265.0, 285.0, 297.0, 264.0, 266.0, 287.0, 294.0, 272.0, 244.0, 294.0, 282.0, 291.0, 279.0, 229.0, 230.0, 259.0, 263.0, 255.0, 255.0, 292.0, 284.0, 290.0, 292.0, 267.0, 263.0, 249.0, 281.0, 301.0, 275.0, 294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7089912828807431, "mean_inference_ms": 1.2738039305353777, "mean_action_processing_ms": 0.135251658168302, "mean_env_wait_ms": 0.8539967288879649, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6016000, "num_agent_steps_trained": 6016000, "num_env_steps_sampled": 3008000, "num_env_steps_trained": 3008000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3008000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6016000, "timers": {"training_iteration_time_ms": 3623.981, "learn_time_ms": 1107.366, "learn_throughput": 11558.965, "synch_weights_time_ms": 9.935}, "counters": {"num_env_steps_sampled": 3008000, "num_env_steps_trained": 3008000, "num_agent_steps_sampled": 6016000, "num_agent_steps_trained": 6016000}, "done": false, "episodes_total": 7520, "training_iteration": 235, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-46", "timestamp": 1666581346, "time_this_iter_s": 3.7096731662750244, "time_total_s": 913.0593836307526, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 913.0593836307526, "timesteps_since_restore": 0, "iterations_since_restore": 235, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.883333333333333, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.5, "shaped_reward_min": 128, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.32, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002625423716381192, "policy_loss": -0.0029444461688399315, "vf_loss": 7.811328887939453, "vf_explained_var": 0.6009083986282349, "kl": 0.0021797027438879013, "entropy": 0.9242209196090698, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3020800, "num_env_steps_trained": 3020800, "num_agent_steps_sampled": 6041600, "num_agent_steps_trained": 6041600}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 408.0, "episode_reward_mean": 555.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 277.85}, "custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 140, "sparse_reward_max": 200, "shaped_reward_mean": 170.5, "shaped_reward_min": 128, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.0, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.32, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.33, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.33, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.26, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7088371959002824, "mean_inference_ms": 1.2734729518259271, "mean_action_processing_ms": 0.13524111288542504, "mean_env_wait_ms": 0.8538048193968771, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 408.0, "episode_reward_mean": 555.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 277.85}, "hist_stats": {"episode_reward": [570.0, 579.0, 576.0, 576.0, 578.0, 530.0, 530.0, 576.0, 573.0, 525.0, 516.0, 530.0, 524.0, 573.0, 582.0, 570.0, 584.0, 527.0, 570.0, 576.0, 519.0, 519.0, 525.0, 527.0, 527.0, 570.0, 525.0, 570.0, 522.0, 522.0, 570.0, 576.0, 582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 276.0, 286.0, 293.0, 303.0, 273.0, 290.0, 286.0, 299.0, 279.0, 264.0, 266.0, 270.0, 260.0, 277.0, 299.0, 288.0, 285.0, 262.0, 263.0, 241.0, 275.0, 270.0, 260.0, 266.0, 258.0, 292.0, 281.0, 301.0, 281.0, 289.0, 281.0, 288.0, 296.0, 269.0, 258.0, 289.0, 281.0, 279.0, 297.0, 263.0, 256.0, 270.0, 249.0, 270.0, 255.0, 275.0, 252.0, 271.0, 256.0, 290.0, 280.0, 269.0, 256.0, 292.0, 278.0, 268.0, 254.0, 275.0, 247.0, 284.0, 286.0, 278.0, 298.0, 290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7088371959002824, "mean_inference_ms": 1.2734729518259271, "mean_action_processing_ms": 0.13524111288542504, "mean_env_wait_ms": 0.8538048193968771, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6041600, "num_agent_steps_trained": 6041600, "num_env_steps_sampled": 3020800, "num_env_steps_trained": 3020800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3020800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6041600, "timers": {"training_iteration_time_ms": 3643.608, "learn_time_ms": 1120.612, "learn_throughput": 11422.327, "synch_weights_time_ms": 10.109}, "counters": {"num_env_steps_sampled": 3020800, "num_env_steps_trained": 3020800, "num_agent_steps_sampled": 6041600, "num_agent_steps_trained": 6041600}, "done": false, "episodes_total": 7552, "training_iteration": 236, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-50", "timestamp": 1666581350, "time_this_iter_s": 3.8835103511810303, "time_total_s": 916.9428939819336, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 916.9428939819336, "timesteps_since_restore": 0, "iterations_since_restore": 236, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.08, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.24, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 1.031567808240652e-05, "policy_loss": -0.0003100165631622076, "vf_loss": 7.778059482574463, "vf_explained_var": 0.6068955659866333, "kl": 0.0022331150248646736, "entropy": 0.9149467945098877, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3033600, "num_env_steps_trained": 3033600, "num_agent_steps_sampled": 6067200, "num_agent_steps_trained": 6067200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 559.64, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 279.82}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.24, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.35, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7087295012561051, "mean_inference_ms": 1.2733068706265849, "mean_action_processing_ms": 0.13523200386488354, "mean_env_wait_ms": 0.8538258765354435, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 559.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 279.82}, "hist_stats": {"episode_reward": [582.0, 587.0, 527.0, 579.0, 570.0, 570.0, 525.0, 465.0, 570.0, 576.0, 525.0, 573.0, 570.0, 525.0, 516.0, 587.0, 579.0, 576.0, 530.0, 579.0, 573.0, 570.0, 573.0, 573.0, 570.0, 567.0, 573.0, 576.0, 579.0, 573.0, 579.0, 573.0, 408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 291.0, 296.0, 264.0, 263.0, 291.0, 288.0, 290.0, 280.0, 287.0, 283.0, 262.0, 263.0, 232.0, 233.0, 277.0, 293.0, 291.0, 285.0, 260.0, 265.0, 280.0, 293.0, 281.0, 289.0, 256.0, 269.0, 260.0, 256.0, 290.0, 297.0, 289.0, 290.0, 283.0, 293.0, 255.0, 275.0, 292.0, 287.0, 288.0, 285.0, 289.0, 281.0, 291.0, 282.0, 289.0, 284.0, 281.0, 289.0, 289.0, 278.0, 285.0, 288.0, 297.0, 279.0, 293.0, 286.0, 279.0, 294.0, 288.0, 291.0, 282.0, 291.0, 195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7087295012561051, "mean_inference_ms": 1.2733068706265849, "mean_action_processing_ms": 0.13523200386488354, "mean_env_wait_ms": 0.8538258765354435, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6067200, "num_agent_steps_trained": 6067200, "num_env_steps_sampled": 3033600, "num_env_steps_trained": 3033600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3033600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6067200, "timers": {"training_iteration_time_ms": 3671.299, "learn_time_ms": 1117.42, "learn_throughput": 11454.961, "synch_weights_time_ms": 10.815}, "counters": {"num_env_steps_sampled": 3033600, "num_env_steps_trained": 3033600, "num_agent_steps_sampled": 6067200, "num_agent_steps_trained": 6067200}, "done": false, "episodes_total": 7584, "training_iteration": 237, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-54", "timestamp": 1666581354, "time_this_iter_s": 3.9520177841186523, "time_total_s": 920.8949117660522, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 920.8949117660522, "timesteps_since_restore": 0, "iterations_since_restore": 237, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.033333333333335, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.1, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.19, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.94, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.24, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.24, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.24, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007050332496874034, "policy_loss": -0.001018117880448699, "vf_loss": 7.772661209106445, "vf_explained_var": 0.5922338962554932, "kl": 0.0018585395300760865, "entropy": 0.9283610582351685, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3046400, "num_env_steps_trained": 3046400, "num_agent_steps_sampled": 6092800, "num_agent_steps_trained": 6092800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 559.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 279.75}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 171.1, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.19, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.94, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.24, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 14, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 11, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 12, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.24, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.24, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7086594012552062, "mean_inference_ms": 1.2731733769862408, "mean_action_processing_ms": 0.13522490952251545, "mean_env_wait_ms": 0.8538714821231937, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 559.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 279.75}, "hist_stats": {"episode_reward": [408.0, 527.0, 533.0, 576.0, 521.0, 513.0, 579.0, 570.0, 576.0, 579.0, 567.0, 516.0, 573.0, 519.0, 576.0, 576.0, 570.0, 522.0, 579.0, 570.0, 573.0, 570.0, 579.0, 530.0, 573.0, 579.0, 579.0, 582.0, 573.0, 573.0, 576.0, 465.0, 573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [195.0, 213.0, 268.0, 259.0, 268.0, 265.0, 291.0, 285.0, 251.0, 270.0, 262.0, 251.0, 294.0, 285.0, 288.0, 282.0, 281.0, 295.0, 295.0, 284.0, 296.0, 271.0, 268.0, 248.0, 278.0, 295.0, 254.0, 265.0, 293.0, 283.0, 292.0, 284.0, 290.0, 280.0, 262.0, 260.0, 289.0, 290.0, 284.0, 286.0, 289.0, 284.0, 276.0, 294.0, 286.0, 293.0, 275.0, 255.0, 292.0, 281.0, 277.0, 302.0, 290.0, 289.0, 285.0, 297.0, 285.0, 288.0, 293.0, 280.0, 286.0, 290.0, 232.0, 233.0, 295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7086594012552062, "mean_inference_ms": 1.2731733769862408, "mean_action_processing_ms": 0.13522490952251545, "mean_env_wait_ms": 0.8538714821231937, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6092800, "num_agent_steps_trained": 6092800, "num_env_steps_sampled": 3046400, "num_env_steps_trained": 3046400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3046400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6092800, "timers": {"training_iteration_time_ms": 3664.058, "learn_time_ms": 1110.218, "learn_throughput": 11529.27, "synch_weights_time_ms": 10.171}, "counters": {"num_env_steps_sampled": 3046400, "num_env_steps_trained": 3046400, "num_agent_steps_sampled": 6092800, "num_agent_steps_trained": 6092800}, "done": false, "episodes_total": 7616, "training_iteration": 238, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-15-58", "timestamp": 1666581358, "time_this_iter_s": 3.6473422050476074, "time_total_s": 924.5422539710999, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 924.5422539710999, "timesteps_since_restore": 0, "iterations_since_restore": 238, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.916666666666668, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 171.79, "shaped_reward_min": 135, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.03, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.56, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.85, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.32, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.32, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.32, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005791244911961257, "policy_loss": -0.0008936794474720955, "vf_loss": 7.769590854644775, "vf_explained_var": 0.596868634223938, "kl": 0.0018960753222927451, "entropy": 0.924808144569397, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3059200, "num_env_steps_trained": 3059200, "num_agent_steps_sampled": 6118400, "num_agent_steps_trained": 6118400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 455.0, "episode_reward_mean": 562.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.095}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 171.79, "shaped_reward_min": 135, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.03, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.56, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.85, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.32, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.86, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.35, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.32, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.32, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7085613748997756, "mean_inference_ms": 1.2729861809868541, "mean_action_processing_ms": 0.13521385639155092, "mean_env_wait_ms": 0.85387454684184, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 455.0, "episode_reward_mean": 562.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.095}, "hist_stats": {"episode_reward": [573.0, 576.0, 522.0, 570.0, 579.0, 579.0, 522.0, 579.0, 570.0, 573.0, 525.0, 576.0, 519.0, 582.0, 530.0, 570.0, 573.0, 576.0, 525.0, 573.0, 573.0, 576.0, 570.0, 579.0, 573.0, 573.0, 455.0, 582.0, 579.0, 573.0, 576.0, 576.0, 630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 278.0, 296.0, 280.0, 261.0, 261.0, 288.0, 282.0, 297.0, 282.0, 291.0, 288.0, 256.0, 266.0, 288.0, 291.0, 279.0, 291.0, 285.0, 288.0, 264.0, 261.0, 287.0, 289.0, 246.0, 273.0, 299.0, 283.0, 259.0, 271.0, 291.0, 279.0, 284.0, 289.0, 290.0, 286.0, 268.0, 257.0, 283.0, 290.0, 281.0, 292.0, 287.0, 289.0, 287.0, 283.0, 289.0, 290.0, 298.0, 275.0, 291.0, 282.0, 233.0, 222.0, 293.0, 289.0, 292.0, 287.0, 290.0, 283.0, 288.0, 288.0, 293.0, 283.0, 309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7085613748997756, "mean_inference_ms": 1.2729861809868541, "mean_action_processing_ms": 0.13521385639155092, "mean_env_wait_ms": 0.85387454684184, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6118400, "num_agent_steps_trained": 6118400, "num_env_steps_sampled": 3059200, "num_env_steps_trained": 3059200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3059200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6118400, "timers": {"training_iteration_time_ms": 3665.489, "learn_time_ms": 1111.722, "learn_throughput": 11513.671, "synch_weights_time_ms": 10.06}, "counters": {"num_env_steps_sampled": 3059200, "num_env_steps_trained": 3059200, "num_agent_steps_sampled": 6118400, "num_agent_steps_trained": 6118400}, "done": false, "episodes_total": 7648, "training_iteration": 239, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-02", "timestamp": 1666581362, "time_this_iter_s": 3.659172296524048, "time_total_s": 928.2014262676239, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 928.2014262676239, "timesteps_since_restore": 0, "iterations_since_restore": 239, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.54, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.99, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.3, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.12, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.96, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.74, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.12, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.96, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.12, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.96, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016768049681559205, "policy_loss": -0.00199872930534184, "vf_loss": 7.833566665649414, "vf_explained_var": 0.6034523248672485, "kl": 0.001984196715056896, "entropy": 0.9228615164756775, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3072000, "num_env_steps_trained": 3072000, "num_agent_steps_sampled": 6144000, "num_agent_steps_trained": 6144000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 510.0, "episode_reward_mean": 562.79, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.395}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.99, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.3, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.12, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.96, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.74, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.12, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.96, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.12, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.96, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7084314977660204, "mean_inference_ms": 1.2726471129035033, "mean_action_processing_ms": 0.135200399735534, "mean_env_wait_ms": 0.8537186524830526, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 510.0, "episode_reward_mean": 562.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.395}, "hist_stats": {"episode_reward": [630.0, 573.0, 576.0, 516.0, 582.0, 579.0, 576.0, 573.0, 570.0, 567.0, 525.0, 533.0, 579.0, 579.0, 561.0, 576.0, 576.0, 579.0, 573.0, 527.0, 527.0, 573.0, 573.0, 573.0, 570.0, 582.0, 525.0, 522.0, 573.0, 573.0, 570.0, 573.0, 573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [309.0, 321.0, 293.0, 280.0, 289.0, 287.0, 269.0, 247.0, 294.0, 288.0, 291.0, 288.0, 286.0, 290.0, 288.0, 285.0, 286.0, 284.0, 280.0, 287.0, 257.0, 268.0, 269.0, 264.0, 292.0, 287.0, 287.0, 292.0, 280.0, 281.0, 286.0, 290.0, 297.0, 279.0, 288.0, 291.0, 287.0, 286.0, 255.0, 272.0, 262.0, 265.0, 287.0, 286.0, 282.0, 291.0, 285.0, 288.0, 278.0, 292.0, 297.0, 285.0, 264.0, 261.0, 261.0, 261.0, 280.0, 293.0, 286.0, 287.0, 280.0, 290.0, 287.0, 286.0, 270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7084314977660204, "mean_inference_ms": 1.2726471129035033, "mean_action_processing_ms": 0.135200399735534, "mean_env_wait_ms": 0.8537186524830526, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6144000, "num_agent_steps_trained": 6144000, "num_env_steps_sampled": 3072000, "num_env_steps_trained": 3072000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3072000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6144000, "timers": {"training_iteration_time_ms": 3666.943, "learn_time_ms": 1113.043, "learn_throughput": 11500.005, "synch_weights_time_ms": 11.202}, "counters": {"num_env_steps_sampled": 3072000, "num_env_steps_trained": 3072000, "num_agent_steps_sampled": 6144000, "num_agent_steps_trained": 6144000}, "done": false, "episodes_total": 7680, "training_iteration": 240, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-05", "timestamp": 1666581365, "time_this_iter_s": 3.6721017360687256, "time_total_s": 931.8735280036926, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 931.8735280036926, "timesteps_since_restore": 0, "iterations_since_restore": 240, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.640000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.26, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.45, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015853033401072025, "policy_loss": -0.0019000378670170903, "vf_loss": 7.7883172035217285, "vf_explained_var": 0.595491886138916, "kl": 0.0021413369104266167, "entropy": 0.9281942844390869, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3084800, "num_env_steps_trained": 3084800, "num_agent_steps_sampled": 6169600, "num_agent_steps_trained": 6169600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 510.0, "episode_reward_mean": 558.46, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 279.23}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.26, "shaped_reward_min": 150, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.45, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.3, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.41, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.07, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.36, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.31, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.3, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.3, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7082637673553855, "mean_inference_ms": 1.2722587608664173, "mean_action_processing_ms": 0.13518180542648156, "mean_env_wait_ms": 0.8534831822426292, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 510.0, "episode_reward_mean": 558.46, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 318.0}, "policy_reward_mean": {"ppo": 279.23}, "hist_stats": {"episode_reward": [573.0, 522.0, 576.0, 516.0, 516.0, 579.0, 573.0, 573.0, 579.0, 582.0, 570.0, 525.0, 576.0, 522.0, 576.0, 576.0, 576.0, 579.0, 573.0, 582.0, 530.0, 570.0, 573.0, 576.0, 579.0, 573.0, 524.0, 576.0, 519.0, 510.0, 570.0, 579.0, 573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [270.0, 303.0, 269.0, 253.0, 273.0, 303.0, 261.0, 255.0, 263.0, 253.0, 293.0, 286.0, 283.0, 290.0, 287.0, 286.0, 289.0, 290.0, 285.0, 297.0, 276.0, 294.0, 269.0, 256.0, 291.0, 285.0, 260.0, 262.0, 287.0, 289.0, 297.0, 279.0, 291.0, 285.0, 293.0, 286.0, 283.0, 290.0, 287.0, 295.0, 263.0, 267.0, 289.0, 281.0, 292.0, 281.0, 278.0, 298.0, 294.0, 285.0, 294.0, 279.0, 276.0, 248.0, 303.0, 273.0, 272.0, 247.0, 257.0, 253.0, 282.0, 288.0, 289.0, 290.0, 291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7082637673553855, "mean_inference_ms": 1.2722587608664173, "mean_action_processing_ms": 0.13518180542648156, "mean_env_wait_ms": 0.8534831822426292, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6169600, "num_agent_steps_trained": 6169600, "num_env_steps_sampled": 3084800, "num_env_steps_trained": 3084800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3084800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6169600, "timers": {"training_iteration_time_ms": 3671.08, "learn_time_ms": 1124.114, "learn_throughput": 11386.75, "synch_weights_time_ms": 11.662}, "counters": {"num_env_steps_sampled": 3084800, "num_env_steps_trained": 3084800, "num_agent_steps_sampled": 6169600, "num_agent_steps_trained": 6169600}, "done": false, "episodes_total": 7712, "training_iteration": 241, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-09", "timestamp": 1666581369, "time_this_iter_s": 3.6422295570373535, "time_total_s": 935.51575756073, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 935.51575756073, "timesteps_since_restore": 0, "iterations_since_restore": 241, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.516666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.68, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.6, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.15, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013410382671281695, "policy_loss": 0.0010229569161310792, "vf_loss": 7.782090187072754, "vf_explained_var": 0.5975505113601685, "kl": 0.0020362879149615765, "entropy": 0.920256495475769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3097600, "num_env_steps_trained": 3097600, "num_agent_steps_sampled": 6195200, "num_agent_steps_trained": 6195200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 558.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.44}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.68, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.6, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.15, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.43, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.76, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.47, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.32, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7081123236132713, "mean_inference_ms": 1.2718984380888532, "mean_action_processing_ms": 0.1351636109557676, "mean_env_wait_ms": 0.8532669400902704, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 558.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.44}, "hist_stats": {"episode_reward": [573.0, 573.0, 573.0, 516.0, 525.0, 576.0, 516.0, 522.0, 630.0, 582.0, 516.0, 570.0, 582.0, 576.0, 573.0, 579.0, 570.0, 579.0, 579.0, 573.0, 573.0, 576.0, 573.0, 576.0, 576.0, 576.0, 573.0, 567.0, 573.0, 573.0, 530.0, 578.0, 525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 294.0, 279.0, 290.0, 283.0, 258.0, 258.0, 260.0, 265.0, 289.0, 287.0, 255.0, 261.0, 271.0, 251.0, 318.0, 312.0, 297.0, 285.0, 256.0, 260.0, 273.0, 297.0, 287.0, 295.0, 279.0, 297.0, 294.0, 279.0, 281.0, 298.0, 289.0, 281.0, 289.0, 290.0, 301.0, 278.0, 276.0, 297.0, 295.0, 278.0, 279.0, 297.0, 280.0, 293.0, 281.0, 295.0, 293.0, 283.0, 287.0, 289.0, 289.0, 284.0, 289.0, 278.0, 296.0, 277.0, 292.0, 281.0, 268.0, 262.0, 294.0, 284.0, 266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7081123236132713, "mean_inference_ms": 1.2718984380888532, "mean_action_processing_ms": 0.1351636109557676, "mean_env_wait_ms": 0.8532669400902704, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6195200, "num_agent_steps_trained": 6195200, "num_env_steps_sampled": 3097600, "num_env_steps_trained": 3097600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3097600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6195200, "timers": {"training_iteration_time_ms": 3658.23, "learn_time_ms": 1121.317, "learn_throughput": 11415.149, "synch_weights_time_ms": 11.586}, "counters": {"num_env_steps_sampled": 3097600, "num_env_steps_trained": 3097600, "num_agent_steps_sampled": 6195200, "num_agent_steps_trained": 6195200}, "done": false, "episodes_total": 7744, "training_iteration": 242, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-13", "timestamp": 1666581373, "time_this_iter_s": 3.5342307090759277, "time_total_s": 939.0499882698059, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 939.0499882698059, "timesteps_since_restore": 0, "iterations_since_restore": 242, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.68, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.48, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.23, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.31, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.07, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.14, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.83, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.83, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.14, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.83, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.14, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.83, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011442599352449179, "policy_loss": -0.0014641538728028536, "vf_loss": 7.753890037536621, "vf_explained_var": 0.5846375823020935, "kl": 0.002333172596991062, "entropy": 0.9109914302825928, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3110400, "num_env_steps_trained": 3110400, "num_agent_steps_sampled": 6220800, "num_agent_steps_trained": 6220800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 558.08, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.04}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 171.68, "shaped_reward_min": 156, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.48, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.23, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.31, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.07, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.14, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.83, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.83, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.46, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.35, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.14, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.83, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.14, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.83, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7079764704995039, "mean_inference_ms": 1.2715597737459958, "mean_action_processing_ms": 0.13514716224342163, "mean_env_wait_ms": 0.8530627219429394, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 516.0, "episode_reward_mean": 558.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.04}, "hist_stats": {"episode_reward": [525.0, 576.0, 522.0, 522.0, 516.0, 579.0, 527.0, 573.0, 516.0, 525.0, 525.0, 576.0, 525.0, 525.0, 573.0, 567.0, 519.0, 573.0, 573.0, 525.0, 530.0, 579.0, 522.0, 581.0, 525.0, 525.0, 582.0, 530.0, 579.0, 573.0, 525.0, 576.0, 576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [266.0, 259.0, 290.0, 286.0, 265.0, 257.0, 262.0, 260.0, 250.0, 266.0, 295.0, 284.0, 255.0, 272.0, 284.0, 289.0, 254.0, 262.0, 255.0, 270.0, 259.0, 266.0, 289.0, 287.0, 266.0, 259.0, 257.0, 268.0, 295.0, 278.0, 294.0, 273.0, 263.0, 256.0, 287.0, 286.0, 299.0, 274.0, 259.0, 266.0, 266.0, 264.0, 286.0, 293.0, 261.0, 261.0, 293.0, 288.0, 261.0, 264.0, 256.0, 269.0, 292.0, 290.0, 257.0, 273.0, 282.0, 297.0, 283.0, 290.0, 265.0, 260.0, 292.0, 284.0, 289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7079764704995039, "mean_inference_ms": 1.2715597737459958, "mean_action_processing_ms": 0.13514716224342163, "mean_env_wait_ms": 0.8530627219429394, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6220800, "num_agent_steps_trained": 6220800, "num_env_steps_sampled": 3110400, "num_env_steps_trained": 3110400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3110400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6220800, "timers": {"training_iteration_time_ms": 3653.687, "learn_time_ms": 1113.56, "learn_throughput": 11494.667, "synch_weights_time_ms": 12.808}, "counters": {"num_env_steps_sampled": 3110400, "num_env_steps_trained": 3110400, "num_agent_steps_sampled": 6220800, "num_agent_steps_trained": 6220800}, "done": false, "episodes_total": 7776, "training_iteration": 243, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-17", "timestamp": 1666581377, "time_this_iter_s": 3.7209229469299316, "time_total_s": 942.7709112167358, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 942.7709112167358, "timesteps_since_restore": 0, "iterations_since_restore": 243, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.566666666666666, "ram_util_percent": 10.633333333333333}} +{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.82, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.3, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.47, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.01, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.09, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.01, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.09, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.01, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.09, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00035139062674716115, "policy_loss": -0.0006855684332549572, "vf_loss": 7.859228610992432, "vf_explained_var": 0.5928146243095398, "kl": 0.002471503335982561, "entropy": 0.903489351272583, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3123200, "num_env_steps_trained": 3123200, "num_agent_steps_sampled": 6246400, "num_agent_steps_trained": 6246400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 459.0, "episode_reward_mean": 564.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.01}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.82, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.3, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.47, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.14, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.01, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.09, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.36, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.09, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.24, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.01, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.09, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.01, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.09, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7078702523741072, "mean_inference_ms": 1.2712685166157995, "mean_action_processing_ms": 0.13513782419630282, "mean_env_wait_ms": 0.8529134216315836, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 459.0, "episode_reward_mean": 564.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.01}, "hist_stats": {"episode_reward": [576.0, 579.0, 573.0, 579.0, 573.0, 576.0, 590.0, 527.0, 579.0, 516.0, 522.0, 573.0, 576.0, 576.0, 579.0, 576.0, 581.0, 579.0, 627.0, 573.0, 522.0, 530.0, 576.0, 576.0, 576.0, 570.0, 576.0, 576.0, 530.0, 579.0, 570.0, 570.0, 516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 287.0, 301.0, 278.0, 281.0, 292.0, 283.0, 296.0, 294.0, 279.0, 283.0, 293.0, 292.0, 298.0, 271.0, 256.0, 286.0, 293.0, 260.0, 256.0, 260.0, 262.0, 297.0, 276.0, 283.0, 293.0, 289.0, 287.0, 301.0, 278.0, 291.0, 285.0, 299.0, 282.0, 290.0, 289.0, 304.0, 323.0, 276.0, 297.0, 264.0, 258.0, 266.0, 264.0, 280.0, 296.0, 297.0, 279.0, 281.0, 295.0, 286.0, 284.0, 289.0, 287.0, 282.0, 294.0, 275.0, 255.0, 297.0, 282.0, 283.0, 287.0, 286.0, 284.0, 253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7078702523741072, "mean_inference_ms": 1.2712685166157995, "mean_action_processing_ms": 0.13513782419630282, "mean_env_wait_ms": 0.8529134216315836, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6246400, "num_agent_steps_trained": 6246400, "num_env_steps_sampled": 3123200, "num_env_steps_trained": 3123200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3123200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6246400, "timers": {"training_iteration_time_ms": 3653.097, "learn_time_ms": 1104.408, "learn_throughput": 11589.92, "synch_weights_time_ms": 14.292}, "counters": {"num_env_steps_sampled": 3123200, "num_env_steps_trained": 3123200, "num_agent_steps_sampled": 6246400, "num_agent_steps_trained": 6246400}, "done": false, "episodes_total": 7808, "training_iteration": 244, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-21", "timestamp": 1666581381, "time_this_iter_s": 3.7214155197143555, "time_total_s": 946.4923267364502, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 946.4923267364502, "timesteps_since_restore": 0, "iterations_since_restore": 244, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.88, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.64, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.47, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0025330903008580208, "policy_loss": -0.0028688705060631037, "vf_loss": 7.884381294250488, "vf_explained_var": 0.582945704460144, "kl": 0.0022667073644697666, "entropy": 0.9053138494491577, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3136000, "num_env_steps_trained": 3136000, "num_agent_steps_sampled": 6272000, "num_agent_steps_trained": 6272000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 459.0, "episode_reward_mean": 561.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 280.72}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.64, "shaped_reward_min": 139, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.64, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.47, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.28, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.9, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.54, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.11, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.33, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.28, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.28, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7077555771781661, "mean_inference_ms": 1.2709830719233666, "mean_action_processing_ms": 0.13513002534520807, "mean_env_wait_ms": 0.8527685578396544, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 459.0, "episode_reward_mean": 561.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 280.72}, "hist_stats": {"episode_reward": [516.0, 525.0, 525.0, 525.0, 570.0, 525.0, 630.0, 525.0, 576.0, 570.0, 573.0, 522.0, 582.0, 533.0, 570.0, 573.0, 576.0, 576.0, 576.0, 525.0, 576.0, 522.0, 579.0, 576.0, 573.0, 525.0, 576.0, 573.0, 573.0, 576.0, 579.0, 573.0, 525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 263.0, 269.0, 256.0, 248.0, 277.0, 263.0, 262.0, 287.0, 283.0, 259.0, 266.0, 323.0, 307.0, 271.0, 254.0, 277.0, 299.0, 282.0, 288.0, 289.0, 284.0, 257.0, 265.0, 293.0, 289.0, 262.0, 271.0, 286.0, 284.0, 288.0, 285.0, 293.0, 283.0, 285.0, 291.0, 289.0, 287.0, 271.0, 254.0, 285.0, 291.0, 259.0, 263.0, 291.0, 288.0, 287.0, 289.0, 291.0, 282.0, 264.0, 261.0, 282.0, 294.0, 282.0, 291.0, 293.0, 280.0, 295.0, 281.0, 286.0, 293.0, 281.0, 292.0, 269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7077555771781661, "mean_inference_ms": 1.2709830719233666, "mean_action_processing_ms": 0.13513002534520807, "mean_env_wait_ms": 0.8527685578396544, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6272000, "num_agent_steps_trained": 6272000, "num_env_steps_sampled": 3136000, "num_env_steps_trained": 3136000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3136000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6272000, "timers": {"training_iteration_time_ms": 3645.573, "learn_time_ms": 1095.891, "learn_throughput": 11679.998, "synch_weights_time_ms": 14.283}, "counters": {"num_env_steps_sampled": 3136000, "num_env_steps_trained": 3136000, "num_agent_steps_sampled": 6272000, "num_agent_steps_trained": 6272000}, "done": false, "episodes_total": 7840, "training_iteration": 245, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-24", "timestamp": 1666581384, "time_this_iter_s": 3.633239984512329, "time_total_s": 950.1255667209625, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 950.1255667209625, "timesteps_since_restore": 0, "iterations_since_restore": 245, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.849999999999998, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 171.95, "shaped_reward_min": 139, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.22, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.53, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.03, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00012638827320188284, "policy_loss": -0.00020441529341042042, "vf_loss": 7.864180564880371, "vf_explained_var": 0.5898736715316772, "kl": 0.0021721776574850082, "entropy": 0.9112254977226257, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3148800, "num_env_steps_trained": 3148800, "num_agent_steps_sampled": 6297600, "num_agent_steps_trained": 6297600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 459.0, "episode_reward_mean": 559.15, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 279.575}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 171.95, "shaped_reward_min": 139, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.22, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.53, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.03, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.15, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.26, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.15, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.15, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7076406330396047, "mean_inference_ms": 1.2706812436953308, "mean_action_processing_ms": 0.13512051871923203, "mean_env_wait_ms": 0.8526030070406487, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 459.0, "episode_reward_mean": 559.15, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 279.575}, "hist_stats": {"episode_reward": [525.0, 573.0, 573.0, 573.0, 582.0, 570.0, 579.0, 582.0, 576.0, 576.0, 573.0, 576.0, 579.0, 567.0, 576.0, 522.0, 573.0, 522.0, 570.0, 579.0, 567.0, 579.0, 567.0, 573.0, 587.0, 576.0, 525.0, 573.0, 573.0, 579.0, 525.0, 459.0, 570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 256.0, 288.0, 285.0, 288.0, 285.0, 296.0, 277.0, 294.0, 288.0, 287.0, 283.0, 292.0, 287.0, 285.0, 297.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 297.0, 279.0, 293.0, 286.0, 294.0, 273.0, 291.0, 285.0, 271.0, 251.0, 272.0, 301.0, 257.0, 265.0, 286.0, 284.0, 296.0, 283.0, 294.0, 273.0, 295.0, 284.0, 284.0, 283.0, 282.0, 291.0, 295.0, 292.0, 293.0, 283.0, 261.0, 264.0, 300.0, 273.0, 278.0, 295.0, 287.0, 292.0, 260.0, 265.0, 240.0, 219.0, 292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7076406330396047, "mean_inference_ms": 1.2706812436953308, "mean_action_processing_ms": 0.13512051871923203, "mean_env_wait_ms": 0.8526030070406487, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6297600, "num_agent_steps_trained": 6297600, "num_env_steps_sampled": 3148800, "num_env_steps_trained": 3148800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3148800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6297600, "timers": {"training_iteration_time_ms": 3623.908, "learn_time_ms": 1080.918, "learn_throughput": 11841.787, "synch_weights_time_ms": 14.216}, "counters": {"num_env_steps_sampled": 3148800, "num_env_steps_trained": 3148800, "num_agent_steps_sampled": 6297600, "num_agent_steps_trained": 6297600}, "done": false, "episodes_total": 7872, "training_iteration": 246, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-28", "timestamp": 1666581388, "time_this_iter_s": 3.6680450439453125, "time_total_s": 953.7936117649078, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 953.7936117649078, "timesteps_since_restore": 0, "iterations_since_restore": 246, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.62, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 170.89, "shaped_reward_min": 156, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.81, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 15.61, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 8.097290992736816e-05, "policy_loss": -0.00024945399491116405, "vf_loss": 7.770112037658691, "vf_explained_var": 0.5763081908226013, "kl": 0.0022793293464928865, "entropy": 0.8931667804718018, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3161600, "num_env_steps_trained": 3161600, "num_agent_steps_sampled": 6323200, "num_agent_steps_trained": 6323200}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 476.0, "episode_reward_mean": 553.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 276.845}, "custom_metrics": {"sparse_reward_mean": 191.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 170.89, "shaped_reward_min": 156, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.81, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.63, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 15.61, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.06, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.707511445459952, "mean_inference_ms": 1.2703453898963024, "mean_action_processing_ms": 0.13510609992774492, "mean_env_wait_ms": 0.8523951092536021, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 476.0, "episode_reward_mean": 553.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 276.845}, "hist_stats": {"episode_reward": [570.0, 576.0, 576.0, 576.0, 533.0, 573.0, 522.0, 576.0, 576.0, 570.0, 576.0, 579.0, 487.0, 525.0, 525.0, 576.0, 530.0, 519.0, 530.0, 579.0, 579.0, 522.0, 573.0, 530.0, 627.0, 576.0, 582.0, 576.0, 570.0, 573.0, 582.0, 627.0, 525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 295.0, 281.0, 278.0, 298.0, 293.0, 283.0, 263.0, 270.0, 280.0, 293.0, 258.0, 264.0, 290.0, 286.0, 281.0, 295.0, 284.0, 286.0, 284.0, 292.0, 285.0, 294.0, 242.0, 245.0, 266.0, 259.0, 265.0, 260.0, 287.0, 289.0, 272.0, 258.0, 260.0, 259.0, 269.0, 261.0, 295.0, 284.0, 296.0, 283.0, 266.0, 256.0, 287.0, 286.0, 265.0, 265.0, 314.0, 313.0, 291.0, 285.0, 293.0, 289.0, 281.0, 295.0, 277.0, 293.0, 286.0, 287.0, 291.0, 291.0, 300.0, 327.0, 264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.707511445459952, "mean_inference_ms": 1.2703453898963024, "mean_action_processing_ms": 0.13510609992774492, "mean_env_wait_ms": 0.8523951092536021, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6323200, "num_agent_steps_trained": 6323200, "num_env_steps_sampled": 3161600, "num_env_steps_trained": 3161600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3161600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6323200, "timers": {"training_iteration_time_ms": 3597.532, "learn_time_ms": 1084.034, "learn_throughput": 11807.752, "synch_weights_time_ms": 14.162}, "counters": {"num_env_steps_sampled": 3161600, "num_env_steps_trained": 3161600, "num_agent_steps_sampled": 6323200, "num_agent_steps_trained": 6323200}, "done": false, "episodes_total": 7904, "training_iteration": 247, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-32", "timestamp": 1666581392, "time_this_iter_s": 3.6923258304595947, "time_total_s": 957.4859375953674, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 957.4859375953674, "timesteps_since_restore": 0, "iterations_since_restore": 247, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.48333333333333, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.62, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.83, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.51, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.67, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.2, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.15, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.88, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.2, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.2, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028802365995943546, "policy_loss": 0.0025495009031146765, "vf_loss": 7.888293266296387, "vf_explained_var": 0.5572105050086975, "kl": 0.0023757275193929672, "entropy": 0.9161854386329651, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3174400, "num_env_steps_trained": 3174400, "num_agent_steps_sampled": 6348800, "num_agent_steps_trained": 6348800}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 476.0, "episode_reward_mean": 550.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.21}, "custom_metrics": {"sparse_reward_mean": 190.4, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.62, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.83, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.51, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.67, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.2, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.15, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 4.88, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.05, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.2, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.2, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.70739328529775, "mean_inference_ms": 1.2700437704412777, "mean_action_processing_ms": 0.1350949326437043, "mean_env_wait_ms": 0.8522100793537379, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 476.0, "episode_reward_mean": 550.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.21}, "hist_stats": {"episode_reward": [525.0, 582.0, 544.0, 579.0, 579.0, 525.0, 570.0, 519.0, 522.0, 519.0, 573.0, 519.0, 576.0, 522.0, 570.0, 570.0, 582.0, 582.0, 522.0, 573.0, 530.0, 530.0, 525.0, 519.0, 570.0, 522.0, 522.0, 522.0, 573.0, 570.0, 579.0, 579.0, 570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 261.0, 289.0, 293.0, 271.0, 273.0, 292.0, 287.0, 293.0, 286.0, 261.0, 264.0, 277.0, 293.0, 254.0, 265.0, 256.0, 266.0, 264.0, 255.0, 293.0, 280.0, 257.0, 262.0, 285.0, 291.0, 272.0, 250.0, 282.0, 288.0, 289.0, 281.0, 300.0, 282.0, 287.0, 295.0, 264.0, 258.0, 297.0, 276.0, 272.0, 258.0, 263.0, 267.0, 259.0, 266.0, 263.0, 256.0, 289.0, 281.0, 253.0, 269.0, 261.0, 261.0, 265.0, 257.0, 284.0, 289.0, 288.0, 282.0, 291.0, 288.0, 280.0, 299.0, 282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.70739328529775, "mean_inference_ms": 1.2700437704412777, "mean_action_processing_ms": 0.1350949326437043, "mean_env_wait_ms": 0.8522100793537379, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6348800, "num_agent_steps_trained": 6348800, "num_env_steps_sampled": 3174400, "num_env_steps_trained": 3174400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3174400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6348800, "timers": {"training_iteration_time_ms": 3606.368, "learn_time_ms": 1089.396, "learn_throughput": 11749.629, "synch_weights_time_ms": 14.267}, "counters": {"num_env_steps_sampled": 3174400, "num_env_steps_trained": 3174400, "num_agent_steps_sampled": 6348800, "num_agent_steps_trained": 6348800}, "done": false, "episodes_total": 7936, "training_iteration": 248, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-36", "timestamp": 1666581396, "time_this_iter_s": 3.726221799850464, "time_total_s": 961.2121593952179, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 961.2121593952179, "timesteps_since_restore": 0, "iterations_since_restore": 248, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.919999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.96, "shaped_reward_min": 153, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.99, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.89, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.97, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.97, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.97, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006418681005015969, "policy_loss": -0.0009683937532827258, "vf_loss": 7.752774238586426, "vf_explained_var": 0.5776857137680054, "kl": 0.0023451021406799555, "entropy": 0.8975032567977905, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3187200, "num_env_steps_trained": 3187200, "num_agent_steps_sampled": 6374400, "num_agent_steps_trained": 6374400}, "sampler_results": {"episode_reward_max": 582.0, "episode_reward_min": 476.0, "episode_reward_mean": 551.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.78}, "custom_metrics": {"sparse_reward_mean": 190.8, "sparse_reward_min": 160, "sparse_reward_max": 200, "shaped_reward_mean": 169.96, "shaped_reward_min": 153, "shaped_reward_max": 182, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 15.99, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 15.89, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.97, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 15.7, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.97, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 15.7, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.97, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 15.7, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.707276742555301, "mean_inference_ms": 1.2697386632886414, "mean_action_processing_ms": 0.13508406524906952, "mean_env_wait_ms": 0.8520395956203936, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 582.0, "episode_reward_min": 476.0, "episode_reward_mean": 551.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 303.0}, "policy_reward_mean": {"ppo": 275.78}, "hist_stats": {"episode_reward": [570.0, 582.0, 576.0, 573.0, 479.0, 573.0, 579.0, 576.0, 579.0, 573.0, 576.0, 570.0, 573.0, 516.0, 570.0, 476.0, 522.0, 573.0, 516.0, 570.0, 522.0, 582.0, 525.0, 522.0, 582.0, 522.0, 479.0, 533.0, 522.0, 525.0, 525.0, 579.0, 570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 300.0, 282.0, 286.0, 290.0, 297.0, 276.0, 243.0, 236.0, 288.0, 285.0, 291.0, 288.0, 293.0, 283.0, 296.0, 283.0, 276.0, 297.0, 291.0, 285.0, 285.0, 285.0, 282.0, 291.0, 270.0, 246.0, 280.0, 290.0, 246.0, 230.0, 265.0, 257.0, 284.0, 289.0, 261.0, 255.0, 292.0, 278.0, 269.0, 253.0, 279.0, 303.0, 262.0, 263.0, 265.0, 257.0, 297.0, 285.0, 252.0, 270.0, 238.0, 241.0, 262.0, 271.0, 261.0, 261.0, 263.0, 262.0, 270.0, 255.0, 286.0, 293.0, 285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.707276742555301, "mean_inference_ms": 1.2697386632886414, "mean_action_processing_ms": 0.13508406524906952, "mean_env_wait_ms": 0.8520395956203936, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6374400, "num_agent_steps_trained": 6374400, "num_env_steps_sampled": 3187200, "num_env_steps_trained": 3187200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3187200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6374400, "timers": {"training_iteration_time_ms": 3601.128, "learn_time_ms": 1084.687, "learn_throughput": 11800.639, "synch_weights_time_ms": 14.231}, "counters": {"num_env_steps_sampled": 3187200, "num_env_steps_trained": 3187200, "num_agent_steps_sampled": 6374400, "num_agent_steps_trained": 6374400}, "done": false, "episodes_total": 7968, "training_iteration": 249, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-40", "timestamp": 1666581400, "time_this_iter_s": 3.60356068611145, "time_total_s": 964.8157200813293, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 964.8157200813293, "timesteps_since_restore": 0, "iterations_since_restore": 249, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.033333333333328, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 170.78, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.62, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.52, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.48, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.48, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.48, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007210352923721075, "policy_loss": 0.0003855510149151087, "vf_loss": 7.820246696472168, "vf_explained_var": 0.577731728553772, "kl": 0.001891107764095068, "entropy": 0.8930797576904297, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3200000, "num_env_steps_trained": 3200000, "num_agent_steps_sampled": 6400000, "num_agent_steps_trained": 6400000}, "sampler_results": {"episode_reward_max": 584.0, "episode_reward_min": 513.0, "episode_reward_mean": 553.98, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 276.99}, "custom_metrics": {"sparse_reward_mean": 191.6, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 170.78, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.62, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.52, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.48, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.37, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.09, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.48, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.37, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.48, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.37, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7071785885693055, "mean_inference_ms": 1.269468341528467, "mean_action_processing_ms": 0.13507871917250727, "mean_env_wait_ms": 0.8519080078969777, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 584.0, "episode_reward_min": 513.0, "episode_reward_mean": 553.98, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 276.99}, "hist_stats": {"episode_reward": [570.0, 570.0, 579.0, 525.0, 570.0, 522.0, 522.0, 573.0, 573.0, 516.0, 576.0, 530.0, 527.0, 525.0, 579.0, 570.0, 527.0, 582.0, 519.0, 582.0, 579.0, 576.0, 573.0, 522.0, 522.0, 573.0, 525.0, 573.0, 567.0, 579.0, 513.0, 570.0, 573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 285.0, 281.0, 289.0, 290.0, 289.0, 253.0, 272.0, 279.0, 291.0, 248.0, 274.0, 263.0, 259.0, 291.0, 282.0, 283.0, 290.0, 260.0, 256.0, 284.0, 292.0, 262.0, 268.0, 270.0, 257.0, 255.0, 270.0, 289.0, 290.0, 276.0, 294.0, 267.0, 260.0, 287.0, 295.0, 256.0, 263.0, 289.0, 293.0, 302.0, 277.0, 281.0, 295.0, 279.0, 294.0, 259.0, 263.0, 252.0, 270.0, 293.0, 280.0, 264.0, 261.0, 280.0, 293.0, 301.0, 266.0, 287.0, 292.0, 255.0, 258.0, 273.0, 297.0, 296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7071785885693055, "mean_inference_ms": 1.269468341528467, "mean_action_processing_ms": 0.13507871917250727, "mean_env_wait_ms": 0.8519080078969777, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6400000, "num_agent_steps_trained": 6400000, "num_env_steps_sampled": 3200000, "num_env_steps_trained": 3200000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3200000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6400000, "timers": {"training_iteration_time_ms": 3602.763, "learn_time_ms": 1076.709, "learn_throughput": 11888.083, "synch_weights_time_ms": 13.686}, "counters": {"num_env_steps_sampled": 3200000, "num_env_steps_trained": 3200000, "num_agent_steps_sampled": 6400000, "num_agent_steps_trained": 6400000}, "done": false, "episodes_total": 8000, "training_iteration": 250, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-44", "timestamp": 1666581404, "time_this_iter_s": 3.6658709049224854, "time_total_s": 968.4815909862518, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 968.4815909862518, "timesteps_since_restore": 0, "iterations_since_restore": 250, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.740000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 171.37, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.34, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.97, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.26, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.2, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.97, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.97, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018597999587655067, "policy_loss": -0.0021993389818817377, "vf_loss": 7.826320171356201, "vf_explained_var": 0.5782947540283203, "kl": 0.0020893034525215626, "entropy": 0.8861854076385498, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3212800, "num_env_steps_trained": 3212800, "num_agent_steps_sampled": 6425600, "num_agent_steps_trained": 6425600}, "sampler_results": {"episode_reward_max": 584.0, "episode_reward_min": 513.0, "episode_reward_mean": 555.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 300.0}, "policy_reward_mean": {"ppo": 277.685}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 171.37, "shaped_reward_min": 153, "shaped_reward_max": 184, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.34, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.18, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.97, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.88, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.64, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.38, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.26, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.2, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.97, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.97, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7071430805255033, "mean_inference_ms": 1.2692005567694524, "mean_action_processing_ms": 0.13507150565449955, "mean_env_wait_ms": 0.8517631405985867, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 584.0, "episode_reward_min": 513.0, "episode_reward_mean": 555.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 250.0}, "policy_reward_max": {"ppo": 300.0}, "policy_reward_mean": {"ppo": 277.685}, "hist_stats": {"episode_reward": [573.0, 525.0, 576.0, 525.0, 525.0, 530.0, 579.0, 576.0, 573.0, 525.0, 573.0, 576.0, 522.0, 576.0, 582.0, 525.0, 582.0, 573.0, 525.0, 519.0, 573.0, 582.0, 573.0, 579.0, 573.0, 570.0, 525.0, 522.0, 576.0, 582.0, 516.0, 525.0, 573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 277.0, 258.0, 267.0, 284.0, 292.0, 258.0, 267.0, 265.0, 260.0, 255.0, 275.0, 296.0, 283.0, 284.0, 292.0, 281.0, 292.0, 267.0, 258.0, 284.0, 289.0, 289.0, 287.0, 258.0, 264.0, 285.0, 291.0, 293.0, 289.0, 267.0, 258.0, 296.0, 286.0, 294.0, 279.0, 268.0, 257.0, 260.0, 259.0, 290.0, 283.0, 300.0, 282.0, 286.0, 287.0, 286.0, 293.0, 288.0, 285.0, 283.0, 287.0, 261.0, 264.0, 264.0, 258.0, 291.0, 285.0, 297.0, 285.0, 259.0, 257.0, 255.0, 270.0, 291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7071430805255033, "mean_inference_ms": 1.2692005567694524, "mean_action_processing_ms": 0.13507150565449955, "mean_env_wait_ms": 0.8517631405985867, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6425600, "num_agent_steps_trained": 6425600, "num_env_steps_sampled": 3212800, "num_env_steps_trained": 3212800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3212800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6425600, "timers": {"training_iteration_time_ms": 3631.708, "learn_time_ms": 1079.384, "learn_throughput": 11858.62, "synch_weights_time_ms": 13.293}, "counters": {"num_env_steps_sampled": 3212800, "num_env_steps_trained": 3212800, "num_agent_steps_sampled": 6425600, "num_agent_steps_trained": 6425600}, "done": false, "episodes_total": 8032, "training_iteration": 251, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-48", "timestamp": 1666581408, "time_this_iter_s": 3.9061965942382812, "time_total_s": 972.3877875804901, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 972.3877875804901, "timesteps_since_restore": 0, "iterations_since_restore": 251, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.066666666666666, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.32, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.54, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.006767911836504936, "policy_loss": 0.006420888472348452, "vf_loss": 7.912266731262207, "vf_explained_var": 0.6025801301002502, "kl": 0.002451230539008975, "entropy": 0.8884056210517883, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3225600, "num_env_steps_trained": 3225600, "num_agent_steps_sampled": 6451200, "num_agent_steps_trained": 6451200}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 550.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.16}, "custom_metrics": {"sparse_reward_mean": 190.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.32, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.54, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.15, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.36, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.55, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.37, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 3, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7070981576947253, "mean_inference_ms": 1.2691200903714683, "mean_action_processing_ms": 0.13505852038860622, "mean_env_wait_ms": 0.851625419616148, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 550.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.16}, "hist_stats": {"episode_reward": [573.0, 524.0, 530.0, 524.0, 579.0, 573.0, 582.0, 525.0, 582.0, 573.0, 525.0, 522.0, 525.0, 573.0, 525.0, 530.0, 525.0, 519.0, 582.0, 582.0, 573.0, 582.0, 522.0, 525.0, 570.0, 573.0, 519.0, 584.0, 579.0, 519.0, 579.0, 579.0, 573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 282.0, 266.0, 258.0, 265.0, 265.0, 268.0, 256.0, 287.0, 292.0, 287.0, 286.0, 287.0, 295.0, 272.0, 253.0, 292.0, 290.0, 285.0, 288.0, 265.0, 260.0, 257.0, 265.0, 271.0, 254.0, 285.0, 288.0, 264.0, 261.0, 265.0, 265.0, 259.0, 266.0, 269.0, 250.0, 290.0, 292.0, 296.0, 286.0, 294.0, 279.0, 294.0, 288.0, 268.0, 254.0, 264.0, 261.0, 286.0, 284.0, 286.0, 287.0, 262.0, 257.0, 292.0, 292.0, 285.0, 294.0, 268.0, 251.0, 286.0, 293.0, 283.0, 296.0, 287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7070981576947253, "mean_inference_ms": 1.2691200903714683, "mean_action_processing_ms": 0.13505852038860622, "mean_env_wait_ms": 0.851625419616148, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6451200, "num_agent_steps_trained": 6451200, "num_env_steps_sampled": 3225600, "num_env_steps_trained": 3225600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3225600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6451200, "timers": {"training_iteration_time_ms": 3663.845, "learn_time_ms": 1085.247, "learn_throughput": 11794.546, "synch_weights_time_ms": 14.734}, "counters": {"num_env_steps_sampled": 3225600, "num_env_steps_trained": 3225600, "num_agent_steps_sampled": 6451200, "num_agent_steps_trained": 6451200}, "done": false, "episodes_total": 8064, "training_iteration": 252, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-52", "timestamp": 1666581412, "time_this_iter_s": 3.8708066940307617, "time_total_s": 976.2585942745209, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 976.2585942745209, "timesteps_since_restore": 0, "iterations_since_restore": 252, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.3, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 190.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.02, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.99, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.68, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.47, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.47, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.47, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0021860229317098856, "policy_loss": -0.0025187181308865547, "vf_loss": 7.718568325042725, "vf_explained_var": 0.5755909085273743, "kl": 0.0020678252913057804, "entropy": 0.8783210515975952, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3238400, "num_env_steps_trained": 3238400, "num_agent_steps_sampled": 6476800, "num_agent_steps_trained": 6476800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 550.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.21}, "custom_metrics": {"sparse_reward_mean": 190.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.02, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.99, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.68, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.29, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.47, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.06, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.07, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.29, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.47, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.29, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.47, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7070492393693716, "mean_inference_ms": 1.2690477704332812, "mean_action_processing_ms": 0.135043148832932, "mean_env_wait_ms": 0.8514795009637173, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 550.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 275.21}, "hist_stats": {"episode_reward": [573.0, 579.0, 522.0, 582.0, 570.0, 522.0, 522.0, 527.0, 582.0, 513.0, 516.0, 582.0, 579.0, 582.0, 573.0, 522.0, 579.0, 522.0, 576.0, 573.0, 579.0, 570.0, 525.0, 573.0, 530.0, 576.0, 525.0, 582.0, 570.0, 573.0, 570.0, 582.0, 579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 286.0, 293.0, 260.0, 262.0, 285.0, 297.0, 286.0, 284.0, 258.0, 264.0, 261.0, 261.0, 260.0, 267.0, 300.0, 282.0, 253.0, 260.0, 251.0, 265.0, 291.0, 291.0, 297.0, 282.0, 289.0, 293.0, 277.0, 296.0, 264.0, 258.0, 295.0, 284.0, 265.0, 257.0, 289.0, 287.0, 284.0, 289.0, 285.0, 294.0, 284.0, 286.0, 264.0, 261.0, 283.0, 290.0, 269.0, 261.0, 293.0, 283.0, 258.0, 267.0, 289.0, 293.0, 294.0, 276.0, 282.0, 291.0, 285.0, 285.0, 288.0, 294.0, 288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7070492393693716, "mean_inference_ms": 1.2690477704332812, "mean_action_processing_ms": 0.135043148832932, "mean_env_wait_ms": 0.8514795009637173, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6476800, "num_agent_steps_trained": 6476800, "num_env_steps_sampled": 3238400, "num_env_steps_trained": 3238400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3238400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6476800, "timers": {"training_iteration_time_ms": 3652.753, "learn_time_ms": 1082.801, "learn_throughput": 11821.191, "synch_weights_time_ms": 14.031}, "counters": {"num_env_steps_sampled": 3238400, "num_env_steps_trained": 3238400, "num_agent_steps_sampled": 6476800, "num_agent_steps_trained": 6476800}, "done": false, "episodes_total": 8096, "training_iteration": 253, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-55", "timestamp": 1666581415, "time_this_iter_s": 3.622879981994629, "time_total_s": 979.8814742565155, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 979.8814742565155, "timesteps_since_restore": 0, "iterations_since_restore": 253, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.76, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.87, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.25, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.01, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.01, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.01, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007734141545370221, "policy_loss": 0.0004366911016404629, "vf_loss": 7.683775424957275, "vf_explained_var": 0.6006571054458618, "kl": 0.002255759434774518, "entropy": 0.8633049130439758, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3251200, "num_env_steps_trained": 3251200, "num_agent_steps_sampled": 6502400, "num_agent_steps_trained": 6502400}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 553.27, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 276.635}, "custom_metrics": {"sparse_reward_mean": 191.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 170.87, "shaped_reward_min": 71, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.25, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.24, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.27, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 13, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.17, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 13, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.01, "potting_onion_agent_1_min": 3, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.07, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.01, "optimal_onion_potting_agent_1_min": 3, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.01, "viable_onion_potting_agent_1_min": 3, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.706976358210824, "mean_inference_ms": 1.2689465769288457, "mean_action_processing_ms": 0.13502696676780063, "mean_env_wait_ms": 0.8513285528676952, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 151.0, "episode_reward_mean": 553.27, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 65.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 276.635}, "hist_stats": {"episode_reward": [579.0, 573.0, 519.0, 582.0, 522.0, 576.0, 582.0, 573.0, 151.0, 527.0, 525.0, 573.0, 525.0, 525.0, 522.0, 516.0, 525.0, 582.0, 530.0, 579.0, 530.0, 519.0, 582.0, 522.0, 582.0, 576.0, 525.0, 579.0, 576.0, 525.0, 522.0, 576.0, 579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 276.0, 297.0, 258.0, 261.0, 296.0, 286.0, 259.0, 263.0, 284.0, 292.0, 287.0, 295.0, 289.0, 284.0, 86.0, 65.0, 256.0, 271.0, 256.0, 269.0, 289.0, 284.0, 268.0, 257.0, 263.0, 262.0, 269.0, 253.0, 271.0, 245.0, 261.0, 264.0, 287.0, 295.0, 259.0, 271.0, 294.0, 285.0, 266.0, 264.0, 257.0, 262.0, 285.0, 297.0, 261.0, 261.0, 287.0, 295.0, 278.0, 298.0, 263.0, 262.0, 293.0, 286.0, 288.0, 288.0, 256.0, 269.0, 252.0, 270.0, 293.0, 283.0, 295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.706976358210824, "mean_inference_ms": 1.2689465769288457, "mean_action_processing_ms": 0.13502696676780063, "mean_env_wait_ms": 0.8513285528676952, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6502400, "num_agent_steps_trained": 6502400, "num_env_steps_sampled": 3251200, "num_env_steps_trained": 3251200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3251200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6502400, "timers": {"training_iteration_time_ms": 3637.223, "learn_time_ms": 1078.626, "learn_throughput": 11866.953, "synch_weights_time_ms": 12.602}, "counters": {"num_env_steps_sampled": 3251200, "num_env_steps_trained": 3251200, "num_agent_steps_sampled": 6502400, "num_agent_steps_trained": 6502400}, "done": false, "episodes_total": 8128, "training_iteration": 254, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-16-59", "timestamp": 1666581419, "time_this_iter_s": 3.5743308067321777, "time_total_s": 983.4558050632477, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 983.4558050632477, "timesteps_since_restore": 0, "iterations_since_restore": 254, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.060000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.78, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.2, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.39, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.47, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.39, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.39, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007592239417135715, "policy_loss": -0.001087638782337308, "vf_loss": 7.597389221191406, "vf_explained_var": 0.5985276699066162, "kl": 0.0018774853087961674, "entropy": 0.8626449108123779, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3264000, "num_env_steps_trained": 3264000, "num_agent_steps_sampled": 6528000, "num_agent_steps_trained": 6528000}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 561.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 280.59}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 172.78, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.36, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.2, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.39, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.47, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.04, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.39, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.39, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7068609015084573, "mean_inference_ms": 1.26862164727497, "mean_action_processing_ms": 0.13501117923771774, "mean_env_wait_ms": 0.8511349115150145, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 510.0, "episode_reward_mean": 561.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 280.59}, "hist_stats": {"episode_reward": [579.0, 627.0, 576.0, 522.0, 573.0, 516.0, 573.0, 522.0, 570.0, 576.0, 576.0, 576.0, 579.0, 570.0, 525.0, 576.0, 525.0, 579.0, 579.0, 522.0, 573.0, 530.0, 576.0, 579.0, 533.0, 576.0, 522.0, 527.0, 525.0, 573.0, 570.0, 576.0, 510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 316.0, 311.0, 287.0, 289.0, 253.0, 269.0, 296.0, 277.0, 279.0, 237.0, 282.0, 291.0, 246.0, 276.0, 287.0, 283.0, 289.0, 287.0, 292.0, 284.0, 289.0, 287.0, 291.0, 288.0, 292.0, 278.0, 266.0, 259.0, 291.0, 285.0, 262.0, 263.0, 281.0, 298.0, 279.0, 300.0, 263.0, 259.0, 288.0, 285.0, 259.0, 271.0, 286.0, 290.0, 286.0, 293.0, 265.0, 268.0, 281.0, 295.0, 250.0, 272.0, 266.0, 261.0, 259.0, 266.0, 284.0, 289.0, 274.0, 296.0, 283.0, 293.0, 251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7068609015084573, "mean_inference_ms": 1.26862164727497, "mean_action_processing_ms": 0.13501117923771774, "mean_env_wait_ms": 0.8511349115150145, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6528000, "num_agent_steps_trained": 6528000, "num_env_steps_sampled": 3264000, "num_env_steps_trained": 3264000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3264000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6528000, "timers": {"training_iteration_time_ms": 3637.109, "learn_time_ms": 1085.381, "learn_throughput": 11793.093, "synch_weights_time_ms": 12.557}, "counters": {"num_env_steps_sampled": 3264000, "num_env_steps_trained": 3264000, "num_agent_steps_sampled": 6528000, "num_agent_steps_trained": 6528000}, "done": false, "episodes_total": 8160, "training_iteration": 255, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-03", "timestamp": 1666581423, "time_this_iter_s": 3.6406946182250977, "time_total_s": 987.0964996814728, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 987.0964996814728, "timesteps_since_restore": 0, "iterations_since_restore": 255, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.316666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 173.56, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.63, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.27, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.27, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.27, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0021585412323474884, "policy_loss": -0.0025006746873259544, "vf_loss": 7.743043899536133, "vf_explained_var": 0.5868951082229614, "kl": 0.0020559565164148808, "entropy": 0.8643389344215393, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3276800, "num_env_steps_trained": 3276800, "num_agent_steps_sampled": 6553600, "num_agent_steps_trained": 6553600}, "sampler_results": {"episode_reward_max": 587.0, "episode_reward_min": 510.0, "episode_reward_mean": 563.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 281.98}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 180, "sparse_reward_max": 200, "shaped_reward_mean": 173.56, "shaped_reward_min": 150, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.63, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.19, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.27, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.27, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.27, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7067256217430281, "mean_inference_ms": 1.2682765239266274, "mean_action_processing_ms": 0.13499352668107936, "mean_env_wait_ms": 0.8509188370516406, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 587.0, "episode_reward_min": 510.0, "episode_reward_mean": 563.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 302.0}, "policy_reward_mean": {"ppo": 281.98}, "hist_stats": {"episode_reward": [510.0, 533.0, 522.0, 525.0, 582.0, 579.0, 579.0, 573.0, 527.0, 576.0, 573.0, 582.0, 579.0, 525.0, 582.0, 579.0, 576.0, 570.0, 576.0, 567.0, 582.0, 576.0, 579.0, 519.0, 525.0, 576.0, 573.0, 527.0, 525.0, 576.0, 579.0, 573.0, 573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [251.0, 259.0, 269.0, 264.0, 259.0, 263.0, 264.0, 261.0, 288.0, 294.0, 299.0, 280.0, 281.0, 298.0, 297.0, 276.0, 260.0, 267.0, 297.0, 279.0, 291.0, 282.0, 288.0, 294.0, 284.0, 295.0, 264.0, 261.0, 291.0, 291.0, 289.0, 290.0, 288.0, 288.0, 279.0, 291.0, 288.0, 288.0, 274.0, 293.0, 289.0, 293.0, 284.0, 292.0, 286.0, 293.0, 258.0, 261.0, 257.0, 268.0, 287.0, 289.0, 279.0, 294.0, 266.0, 261.0, 265.0, 260.0, 295.0, 281.0, 287.0, 292.0, 279.0, 294.0, 288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7067256217430281, "mean_inference_ms": 1.2682765239266274, "mean_action_processing_ms": 0.13499352668107936, "mean_env_wait_ms": 0.8509188370516406, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6553600, "num_agent_steps_trained": 6553600, "num_env_steps_sampled": 3276800, "num_env_steps_trained": 3276800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3276800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6553600, "timers": {"training_iteration_time_ms": 3632.587, "learn_time_ms": 1086.132, "learn_throughput": 11784.934, "synch_weights_time_ms": 11.912}, "counters": {"num_env_steps_sampled": 3276800, "num_env_steps_trained": 3276800, "num_agent_steps_sampled": 6553600, "num_agent_steps_trained": 6553600}, "done": false, "episodes_total": 8192, "training_iteration": 256, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-07", "timestamp": 1666581427, "time_this_iter_s": 3.608936071395874, "time_total_s": 990.7054357528687, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 990.7054357528687, "timesteps_since_restore": 0, "iterations_since_restore": 256, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.419999999999998, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 174.8, "shaped_reward_min": 156, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.92, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.25, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.59, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.59, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.59, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006072861724533141, "policy_loss": -0.0009418814443051815, "vf_loss": 7.661257743835449, "vf_explained_var": 0.5704012513160706, "kl": 0.001968295779079199, "entropy": 0.8630570769309998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3289600, "num_env_steps_trained": 3289600, "num_agent_steps_sampled": 6579200, "num_agent_steps_trained": 6579200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 516.0, "episode_reward_mean": 567.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 283.8}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 174.8, "shaped_reward_min": 156, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.92, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.25, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.59, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.68, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.59, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.59, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7066194733142604, "mean_inference_ms": 1.2679197114805454, "mean_action_processing_ms": 0.1349735798694533, "mean_env_wait_ms": 0.8506980210996916, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 516.0, "episode_reward_mean": 567.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 283.8}, "hist_stats": {"episode_reward": [573.0, 582.0, 570.0, 576.0, 573.0, 519.0, 579.0, 579.0, 570.0, 579.0, 525.0, 576.0, 519.0, 573.0, 576.0, 584.0, 582.0, 570.0, 576.0, 525.0, 579.0, 576.0, 579.0, 587.0, 527.0, 576.0, 579.0, 579.0, 576.0, 525.0, 579.0, 522.0, 579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 285.0, 297.0, 290.0, 280.0, 283.0, 293.0, 290.0, 283.0, 260.0, 259.0, 282.0, 297.0, 290.0, 289.0, 282.0, 288.0, 285.0, 294.0, 262.0, 263.0, 285.0, 291.0, 249.0, 270.0, 280.0, 293.0, 287.0, 289.0, 302.0, 282.0, 296.0, 286.0, 283.0, 287.0, 277.0, 299.0, 263.0, 262.0, 284.0, 295.0, 288.0, 288.0, 288.0, 291.0, 293.0, 294.0, 266.0, 261.0, 287.0, 289.0, 292.0, 287.0, 290.0, 289.0, 291.0, 285.0, 263.0, 262.0, 301.0, 278.0, 257.0, 265.0, 283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7066194733142604, "mean_inference_ms": 1.2679197114805454, "mean_action_processing_ms": 0.1349735798694533, "mean_env_wait_ms": 0.8506980210996916, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6579200, "num_agent_steps_trained": 6579200, "num_env_steps_sampled": 3289600, "num_env_steps_trained": 3289600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3289600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6579200, "timers": {"training_iteration_time_ms": 3627.519, "learn_time_ms": 1083.827, "learn_throughput": 11810.001, "synch_weights_time_ms": 12.02}, "counters": {"num_env_steps_sampled": 3289600, "num_env_steps_trained": 3289600, "num_agent_steps_sampled": 6579200, "num_agent_steps_trained": 6579200}, "done": false, "episodes_total": 8224, "training_iteration": 257, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-11", "timestamp": 1666581431, "time_this_iter_s": 3.631376266479492, "time_total_s": 994.3368120193481, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 994.3368120193481, "timesteps_since_restore": 0, "iterations_since_restore": 257, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.66, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.45, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.77, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.51, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00214711157605052, "policy_loss": -0.002498897723853588, "vf_loss": 7.818296432495117, "vf_explained_var": 0.5837655663490295, "kl": 0.002699965611100197, "entropy": 0.86008620262146, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3302400, "num_env_steps_trained": 3302400, "num_agent_steps_sampled": 6604800, "num_agent_steps_trained": 6604800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 296.0, "episode_reward_mean": 563.66, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 142.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.83}, "custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.66, "shaped_reward_min": 96, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.45, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.77, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.51, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.56, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7065245715933673, "mean_inference_ms": 1.2675972055067737, "mean_action_processing_ms": 0.1349561136398719, "mean_env_wait_ms": 0.8505030940724262, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 296.0, "episode_reward_mean": 563.66, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 142.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.83}, "hist_stats": {"episode_reward": [579.0, 522.0, 522.0, 579.0, 525.0, 582.0, 579.0, 573.0, 524.0, 570.0, 576.0, 576.0, 576.0, 579.0, 576.0, 573.0, 579.0, 576.0, 579.0, 582.0, 573.0, 522.0, 584.0, 576.0, 570.0, 573.0, 530.0, 576.0, 573.0, 570.0, 573.0, 576.0, 576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 263.0, 259.0, 254.0, 268.0, 295.0, 284.0, 264.0, 261.0, 296.0, 286.0, 291.0, 288.0, 282.0, 291.0, 256.0, 268.0, 286.0, 284.0, 283.0, 293.0, 294.0, 282.0, 292.0, 284.0, 283.0, 296.0, 283.0, 293.0, 290.0, 283.0, 293.0, 286.0, 285.0, 291.0, 297.0, 282.0, 285.0, 297.0, 280.0, 293.0, 264.0, 258.0, 297.0, 287.0, 292.0, 284.0, 280.0, 290.0, 290.0, 283.0, 262.0, 268.0, 274.0, 302.0, 298.0, 275.0, 278.0, 292.0, 289.0, 284.0, 283.0, 293.0, 287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7065245715933673, "mean_inference_ms": 1.2675972055067737, "mean_action_processing_ms": 0.1349561136398719, "mean_env_wait_ms": 0.8505030940724262, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6604800, "num_agent_steps_trained": 6604800, "num_env_steps_sampled": 3302400, "num_env_steps_trained": 3302400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3302400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6604800, "timers": {"training_iteration_time_ms": 3611.336, "learn_time_ms": 1074.018, "learn_throughput": 11917.868, "synch_weights_time_ms": 12.001}, "counters": {"num_env_steps_sampled": 3302400, "num_env_steps_trained": 3302400, "num_agent_steps_sampled": 6604800, "num_agent_steps_trained": 6604800}, "done": false, "episodes_total": 8256, "training_iteration": 258, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-14", "timestamp": 1666581434, "time_this_iter_s": 3.57814884185791, "time_total_s": 997.914960861206, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 997.914960861206, "timesteps_since_restore": 0, "iterations_since_restore": 258, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.716666666666665, "ram_util_percent": 10.633333333333333}} +{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 172.75, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.99, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.99, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.8, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.8, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.8, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009613498696126044, "policy_loss": 0.0006067568901926279, "vf_loss": 7.799860000610352, "vf_explained_var": 0.5667149424552917, "kl": 0.0024039496202021837, "entropy": 0.8507847785949707, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3315200, "num_env_steps_trained": 3315200, "num_agent_steps_sampled": 6630400, "num_agent_steps_trained": 6630400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 559.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 279.975}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 172.75, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.99, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.21, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.86, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.99, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.8, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.23, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.8, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.8, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7064238285392225, "mean_inference_ms": 1.2672823442085852, "mean_action_processing_ms": 0.1349392502739434, "mean_env_wait_ms": 0.850318293245503, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 559.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 279.975}, "hist_stats": {"episode_reward": [576.0, 530.0, 579.0, 573.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 525.0, 579.0, 573.0, 573.0, 582.0, 576.0, 579.0, 576.0, 522.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 525.0, 576.0, 579.0, 541.0, 582.0, 573.0, 633.0, 570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 257.0, 273.0, 287.0, 292.0, 295.0, 278.0, 286.0, 293.0, 292.0, 290.0, 280.0, 293.0, 289.0, 293.0, 285.0, 297.0, 288.0, 294.0, 259.0, 266.0, 285.0, 294.0, 282.0, 291.0, 288.0, 285.0, 290.0, 292.0, 289.0, 287.0, 289.0, 290.0, 294.0, 282.0, 268.0, 254.0, 286.0, 296.0, 288.0, 294.0, 293.0, 289.0, 290.0, 283.0, 294.0, 288.0, 288.0, 282.0, 261.0, 264.0, 289.0, 287.0, 285.0, 294.0, 282.0, 259.0, 291.0, 291.0, 286.0, 287.0, 309.0, 324.0, 280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7064238285392225, "mean_inference_ms": 1.2672823442085852, "mean_action_processing_ms": 0.1349392502739434, "mean_env_wait_ms": 0.850318293245503, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6630400, "num_agent_steps_trained": 6630400, "num_env_steps_sampled": 3315200, "num_env_steps_trained": 3315200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3315200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6630400, "timers": {"training_iteration_time_ms": 3610.145, "learn_time_ms": 1077.179, "learn_throughput": 11882.887, "synch_weights_time_ms": 11.925}, "counters": {"num_env_steps_sampled": 3315200, "num_env_steps_trained": 3315200, "num_agent_steps_sampled": 6630400, "num_agent_steps_trained": 6630400}, "done": false, "episodes_total": 8288, "training_iteration": 259, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-18", "timestamp": 1666581438, "time_this_iter_s": 3.6026687622070312, "time_total_s": 1001.5176296234131, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1001.5176296234131, "timesteps_since_restore": 0, "iterations_since_restore": 259, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.559999999999995, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 172.24, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006135091534815729, "policy_loss": 0.00027569825761020184, "vf_loss": 7.666841506958008, "vf_explained_var": 0.5820589661598206, "kl": 0.0019985504914075136, "entropy": 0.8577451109886169, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3328000, "num_env_steps_trained": 3328000, "num_agent_steps_sampled": 6656000, "num_agent_steps_trained": 6656000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 559.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.52}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 172.24, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.706283089866591, "mean_inference_ms": 1.2669765834633018, "mean_action_processing_ms": 0.13492354244984092, "mean_env_wait_ms": 0.8501340572572202, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 559.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.52}, "hist_stats": {"episode_reward": [570.0, 576.0, 582.0, 516.0, 587.0, 536.0, 567.0, 573.0, 296.0, 582.0, 576.0, 582.0, 525.0, 576.0, 576.0, 579.0, 465.0, 582.0, 522.0, 564.0, 576.0, 630.0, 525.0, 570.0, 522.0, 579.0, 533.0, 576.0, 522.0, 570.0, 579.0, 576.0, 579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 290.0, 291.0, 285.0, 293.0, 289.0, 261.0, 255.0, 291.0, 296.0, 260.0, 276.0, 280.0, 287.0, 295.0, 278.0, 142.0, 154.0, 295.0, 287.0, 283.0, 293.0, 298.0, 284.0, 254.0, 271.0, 278.0, 298.0, 294.0, 282.0, 289.0, 290.0, 231.0, 234.0, 296.0, 286.0, 262.0, 260.0, 294.0, 270.0, 291.0, 285.0, 313.0, 317.0, 262.0, 263.0, 279.0, 291.0, 252.0, 270.0, 282.0, 297.0, 270.0, 263.0, 293.0, 283.0, 253.0, 269.0, 279.0, 291.0, 280.0, 299.0, 286.0, 290.0, 288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.706283089866591, "mean_inference_ms": 1.2669765834633018, "mean_action_processing_ms": 0.13492354244984092, "mean_env_wait_ms": 0.8501340572572202, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6656000, "num_agent_steps_trained": 6656000, "num_env_steps_sampled": 3328000, "num_env_steps_trained": 3328000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3328000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6656000, "timers": {"training_iteration_time_ms": 3613.965, "learn_time_ms": 1085.571, "learn_throughput": 11791.033, "synch_weights_time_ms": 12.394}, "counters": {"num_env_steps_sampled": 3328000, "num_env_steps_trained": 3328000, "num_agent_steps_sampled": 6656000, "num_agent_steps_trained": 6656000}, "done": false, "episodes_total": 8320, "training_iteration": 260, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-22", "timestamp": 1666581442, "time_this_iter_s": 3.7049059867858887, "time_total_s": 1005.222535610199, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1005.222535610199, "timesteps_since_restore": 0, "iterations_since_restore": 260, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.816666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.44, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.53, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.18, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.83, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.18, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.18, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0020203336607664824, "policy_loss": -0.0023637900594621897, "vf_loss": 7.752152442932129, "vf_explained_var": 0.5669132471084595, "kl": 0.00226578488945961, "entropy": 0.8635151386260986, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3340800, "num_env_steps_trained": 3340800, "num_agent_steps_sampled": 6681600, "num_agent_steps_trained": 6681600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 562.64, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.32}, "custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.44, "shaped_reward_min": 74, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.7, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.53, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.55, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.18, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.83, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.18, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.18, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7061518572828223, "mean_inference_ms": 1.2667114392745973, "mean_action_processing_ms": 0.13491271579073816, "mean_env_wait_ms": 0.8499858420504343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 234.0, "episode_reward_mean": 562.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 109.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.32}, "hist_stats": {"episode_reward": [579.0, 522.0, 573.0, 576.0, 579.0, 573.0, 582.0, 576.0, 576.0, 522.0, 576.0, 536.0, 579.0, 633.0, 576.0, 530.0, 576.0, 579.0, 576.0, 576.0, 576.0, 522.0, 522.0, 576.0, 579.0, 234.0, 573.0, 522.0, 525.0, 579.0, 570.0, 573.0, 579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 260.0, 262.0, 285.0, 288.0, 293.0, 283.0, 288.0, 291.0, 289.0, 284.0, 294.0, 288.0, 295.0, 281.0, 282.0, 294.0, 267.0, 255.0, 279.0, 297.0, 277.0, 259.0, 288.0, 291.0, 319.0, 314.0, 294.0, 282.0, 271.0, 259.0, 286.0, 290.0, 289.0, 290.0, 288.0, 288.0, 290.0, 286.0, 290.0, 286.0, 258.0, 264.0, 264.0, 258.0, 286.0, 290.0, 290.0, 289.0, 125.0, 109.0, 289.0, 284.0, 259.0, 263.0, 259.0, 266.0, 282.0, 297.0, 287.0, 283.0, 287.0, 286.0, 286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7061518572828223, "mean_inference_ms": 1.2667114392745973, "mean_action_processing_ms": 0.13491271579073816, "mean_env_wait_ms": 0.8499858420504343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6681600, "num_agent_steps_trained": 6681600, "num_env_steps_sampled": 3340800, "num_env_steps_trained": 3340800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3340800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6681600, "timers": {"training_iteration_time_ms": 3592.788, "learn_time_ms": 1077.759, "learn_throughput": 11876.493, "synch_weights_time_ms": 12.268}, "counters": {"num_env_steps_sampled": 3340800, "num_env_steps_trained": 3340800, "num_agent_steps_sampled": 6681600, "num_agent_steps_trained": 6681600}, "done": false, "episodes_total": 8352, "training_iteration": 261, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-26", "timestamp": 1666581446, "time_this_iter_s": 3.7127888202667236, "time_total_s": 1008.9353244304657, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1008.9353244304657, "timesteps_since_restore": 0, "iterations_since_restore": 261, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.04, "shaped_reward_min": 97, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.45, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.92, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016281807329505682, "policy_loss": -0.0019714063964784145, "vf_loss": 7.717082977294922, "vf_explained_var": 0.5875511169433594, "kl": 0.00202760798856616, "entropy": 0.8569626808166504, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3353600, "num_env_steps_trained": 3353600, "num_agent_steps_sampled": 6707200, "num_agent_steps_trained": 6707200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 297.0, "episode_reward_mean": 571.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 148.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.62}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.04, "shaped_reward_min": 97, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.12, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.45, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.92, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.17, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.17, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.17, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7060269888990688, "mean_inference_ms": 1.2664474945522755, "mean_action_processing_ms": 0.13490230202482045, "mean_env_wait_ms": 0.8498471862817376, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 297.0, "episode_reward_mean": 571.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 148.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.62}, "hist_stats": {"episode_reward": [579.0, 525.0, 573.0, 579.0, 579.0, 587.0, 579.0, 573.0, 579.0, 576.0, 522.0, 522.0, 582.0, 573.0, 579.0, 579.0, 573.0, 522.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 570.0, 582.0, 576.0, 579.0, 522.0, 576.0, 576.0, 579.0, 576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 254.0, 271.0, 284.0, 289.0, 280.0, 299.0, 289.0, 290.0, 292.0, 295.0, 287.0, 292.0, 288.0, 285.0, 293.0, 286.0, 291.0, 285.0, 262.0, 260.0, 261.0, 261.0, 297.0, 285.0, 278.0, 295.0, 288.0, 291.0, 289.0, 290.0, 280.0, 293.0, 254.0, 268.0, 290.0, 292.0, 295.0, 287.0, 280.0, 290.0, 289.0, 293.0, 291.0, 285.0, 285.0, 294.0, 284.0, 286.0, 288.0, 294.0, 282.0, 294.0, 284.0, 295.0, 262.0, 260.0, 284.0, 292.0, 283.0, 293.0, 285.0, 294.0, 287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7060269888990688, "mean_inference_ms": 1.2664474945522755, "mean_action_processing_ms": 0.13490230202482045, "mean_env_wait_ms": 0.8498471862817376, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6707200, "num_agent_steps_trained": 6707200, "num_env_steps_sampled": 3353600, "num_env_steps_trained": 3353600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3353600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6707200, "timers": {"training_iteration_time_ms": 3580.2, "learn_time_ms": 1085.817, "learn_throughput": 11788.361, "synch_weights_time_ms": 10.814}, "counters": {"num_env_steps_sampled": 3353600, "num_env_steps_trained": 3353600, "num_agent_steps_sampled": 6707200, "num_agent_steps_trained": 6707200}, "done": false, "episodes_total": 8384, "training_iteration": 262, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-30", "timestamp": 1666581450, "time_this_iter_s": 3.735849142074585, "time_total_s": 1012.6711735725403, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1012.6711735725403, "timesteps_since_restore": 0, "iterations_since_restore": 262, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.650000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.09, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.53, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.61, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011134549276903272, "policy_loss": 0.0007515454199165106, "vf_loss": 7.840696334838867, "vf_explained_var": 0.5784326791763306, "kl": 0.016453076153993607, "entropy": 0.8443182706832886, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3366400, "num_env_steps_trained": 3366400, "num_agent_steps_sampled": 6732800, "num_agent_steps_trained": 6732800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 567.49, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 283.745}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.09, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.72, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.53, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.61, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.41, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.93, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.44, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7059000375335294, "mean_inference_ms": 1.2661800925391582, "mean_action_processing_ms": 0.13489277283830975, "mean_env_wait_ms": 0.8497105315759467, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 567.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 283.745}, "hist_stats": {"episode_reward": [576.0, 522.0, 582.0, 576.0, 297.0, 582.0, 576.0, 573.0, 579.0, 576.0, 582.0, 573.0, 570.0, 579.0, 582.0, 570.0, 587.0, 579.0, 576.0, 522.0, 587.0, 573.0, 576.0, 582.0, 570.0, 582.0, 573.0, 584.0, 582.0, 579.0, 579.0, 522.0, 573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 263.0, 259.0, 289.0, 293.0, 296.0, 280.0, 149.0, 148.0, 286.0, 296.0, 292.0, 284.0, 280.0, 293.0, 291.0, 288.0, 294.0, 282.0, 289.0, 293.0, 288.0, 285.0, 283.0, 287.0, 288.0, 291.0, 291.0, 291.0, 282.0, 288.0, 300.0, 287.0, 288.0, 291.0, 296.0, 280.0, 255.0, 267.0, 294.0, 293.0, 290.0, 283.0, 288.0, 288.0, 289.0, 293.0, 283.0, 287.0, 291.0, 291.0, 277.0, 296.0, 292.0, 292.0, 297.0, 285.0, 292.0, 287.0, 290.0, 289.0, 266.0, 256.0, 288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7059000375335294, "mean_inference_ms": 1.2661800925391582, "mean_action_processing_ms": 0.13489277283830975, "mean_env_wait_ms": 0.8497105315759467, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6732800, "num_agent_steps_trained": 6732800, "num_env_steps_sampled": 3366400, "num_env_steps_trained": 3366400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3366400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6732800, "timers": {"training_iteration_time_ms": 3589.346, "learn_time_ms": 1089.994, "learn_throughput": 11743.188, "synch_weights_time_ms": 10.681}, "counters": {"num_env_steps_sampled": 3366400, "num_env_steps_trained": 3366400, "num_agent_steps_sampled": 6732800, "num_agent_steps_trained": 6732800}, "done": false, "episodes_total": 8416, "training_iteration": 263, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-34", "timestamp": 1666581454, "time_this_iter_s": 3.7120368480682373, "time_total_s": 1016.3832104206085, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1016.3832104206085, "timesteps_since_restore": 0, "iterations_since_restore": 263, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.779999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.94, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.58, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.78, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.42, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.22, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.49, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.22, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.49, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.22, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.49, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0022870218381285667, "policy_loss": 0.0019353741081431508, "vf_loss": 7.78421688079834, "vf_explained_var": 0.5842685699462891, "kl": 0.002388710156083107, "entropy": 0.8535453081130981, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3379200, "num_env_steps_trained": 3379200, "num_agent_steps_sampled": 6758400, "num_agent_steps_trained": 6758400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 568.34, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 284.17}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.94, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.58, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.78, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.42, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.62, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.22, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.49, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.22, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.49, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.22, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.49, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7057810537052588, "mean_inference_ms": 1.2658757102807892, "mean_action_processing_ms": 0.13487831523960633, "mean_env_wait_ms": 0.849539354254739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 568.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 284.17}, "hist_stats": {"episode_reward": [573.0, 579.0, 527.0, 579.0, 576.0, 587.0, 582.0, 579.0, 579.0, 567.0, 582.0, 579.0, 584.0, 576.0, 582.0, 576.0, 579.0, 576.0, 579.0, 639.0, 582.0, 573.0, 576.0, 552.0, 576.0, 579.0, 570.0, 582.0, 579.0, 636.0, 576.0, 584.0, 579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 285.0, 291.0, 288.0, 273.0, 254.0, 292.0, 287.0, 276.0, 300.0, 288.0, 299.0, 296.0, 286.0, 285.0, 294.0, 282.0, 297.0, 291.0, 276.0, 286.0, 296.0, 292.0, 287.0, 284.0, 300.0, 278.0, 298.0, 288.0, 294.0, 295.0, 281.0, 285.0, 294.0, 295.0, 281.0, 283.0, 296.0, 324.0, 315.0, 283.0, 299.0, 288.0, 285.0, 278.0, 298.0, 278.0, 274.0, 289.0, 287.0, 293.0, 286.0, 277.0, 293.0, 292.0, 290.0, 288.0, 291.0, 314.0, 322.0, 286.0, 290.0, 290.0, 294.0, 291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7057810537052588, "mean_inference_ms": 1.2658757102807892, "mean_action_processing_ms": 0.13487831523960633, "mean_env_wait_ms": 0.849539354254739, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6758400, "num_agent_steps_trained": 6758400, "num_env_steps_sampled": 3379200, "num_env_steps_trained": 3379200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3379200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6758400, "timers": {"training_iteration_time_ms": 3595.683, "learn_time_ms": 1098.205, "learn_throughput": 11655.379, "synch_weights_time_ms": 10.673}, "counters": {"num_env_steps_sampled": 3379200, "num_env_steps_trained": 3379200, "num_agent_steps_sampled": 6758400, "num_agent_steps_trained": 6758400}, "done": false, "episodes_total": 8448, "training_iteration": 264, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-37", "timestamp": 1666581457, "time_this_iter_s": 3.6259381771087646, "time_total_s": 1020.0091485977173, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1020.0091485977173, "timesteps_since_restore": 0, "iterations_since_restore": 264, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.48, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.54, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.83, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.45, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.22, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.41, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.76, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.22, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.41, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.22, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.41, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003440435044467449, "policy_loss": 0.003094225190579891, "vf_loss": 7.7306413650512695, "vf_explained_var": 0.5569076538085938, "kl": 0.0026197489351034164, "entropy": 0.8537062406539917, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3392000, "num_env_steps_trained": 3392000, "num_agent_steps_sampled": 6784000, "num_agent_steps_trained": 6784000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 567.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 283.77}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.54, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.83, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.45, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.65, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.22, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.41, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.76, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.4, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.71, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.32, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.23, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.05, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.22, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.41, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.22, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.41, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7056736532332322, "mean_inference_ms": 1.265572151465473, "mean_action_processing_ms": 0.13486356298137628, "mean_env_wait_ms": 0.849365499448659, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 567.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 283.77}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 579.0, 582.0, 579.0, 579.0, 579.0, 581.0, 587.0, 570.0, 522.0, 579.0, 525.0, 576.0, 570.0, 533.0, 630.0, 579.0, 582.0, 630.0, 522.0, 582.0, 579.0, 579.0, 627.0, 570.0, 482.0, 579.0, 576.0, 573.0, 69.0, 573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 295.0, 287.0, 291.0, 288.0, 282.0, 297.0, 289.0, 293.0, 295.0, 284.0, 291.0, 288.0, 286.0, 293.0, 287.0, 294.0, 296.0, 291.0, 281.0, 289.0, 255.0, 267.0, 301.0, 278.0, 261.0, 264.0, 289.0, 287.0, 288.0, 282.0, 261.0, 272.0, 326.0, 304.0, 288.0, 291.0, 297.0, 285.0, 319.0, 311.0, 263.0, 259.0, 287.0, 295.0, 290.0, 289.0, 286.0, 293.0, 314.0, 313.0, 285.0, 285.0, 245.0, 237.0, 289.0, 290.0, 282.0, 294.0, 285.0, 288.0, 34.0, 35.0, 289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7056736532332322, "mean_inference_ms": 1.265572151465473, "mean_action_processing_ms": 0.13486356298137628, "mean_env_wait_ms": 0.849365499448659, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6784000, "num_agent_steps_trained": 6784000, "num_env_steps_sampled": 3392000, "num_env_steps_trained": 3392000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3392000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6784000, "timers": {"training_iteration_time_ms": 3588.842, "learn_time_ms": 1087.972, "learn_throughput": 11765.014, "synch_weights_time_ms": 11.318}, "counters": {"num_env_steps_sampled": 3392000, "num_env_steps_trained": 3392000, "num_agent_steps_sampled": 6784000, "num_agent_steps_trained": 6784000}, "done": false, "episodes_total": 8480, "training_iteration": 265, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-41", "timestamp": 1666581461, "time_this_iter_s": 3.5863428115844727, "time_total_s": 1023.5954914093018, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1023.5954914093018, "timesteps_since_restore": 0, "iterations_since_restore": 265, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.649999999999995, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.71, "shaped_reward_min": 132, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.77, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.71, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.62, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.41, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.41, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.41, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019726380705833435, "policy_loss": 0.0016227063024416566, "vf_loss": 7.775620937347412, "vf_explained_var": 0.5693689584732056, "kl": 0.0020303893834352493, "entropy": 0.8552579879760742, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3404800, "num_env_steps_trained": 3404800, "num_agent_steps_sampled": 6809600, "num_agent_steps_trained": 6809600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 452.0, "episode_reward_mean": 568.51, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.255}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.71, "shaped_reward_min": 132, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.77, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.71, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.62, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.46, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.18, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.41, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.24, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.08, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.3, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.39, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.3, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.41, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.24, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.41, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.24, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7055510816247841, "mean_inference_ms": 1.2652897116048947, "mean_action_processing_ms": 0.13484551900006134, "mean_env_wait_ms": 0.8493587874707947, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 452.0, "episode_reward_mean": 568.51, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.255}, "hist_stats": {"episode_reward": [573.0, 579.0, 582.0, 582.0, 576.0, 533.0, 530.0, 581.0, 573.0, 576.0, 576.0, 576.0, 573.0, 582.0, 576.0, 570.0, 519.0, 579.0, 573.0, 573.0, 582.0, 579.0, 590.0, 579.0, 582.0, 584.0, 582.0, 533.0, 479.0, 579.0, 576.0, 582.0, 584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 286.0, 293.0, 291.0, 291.0, 294.0, 288.0, 281.0, 295.0, 263.0, 270.0, 269.0, 261.0, 282.0, 299.0, 295.0, 278.0, 289.0, 287.0, 296.0, 280.0, 299.0, 277.0, 289.0, 284.0, 298.0, 284.0, 282.0, 294.0, 287.0, 283.0, 262.0, 257.0, 291.0, 288.0, 291.0, 282.0, 285.0, 288.0, 292.0, 290.0, 289.0, 290.0, 293.0, 297.0, 297.0, 282.0, 298.0, 284.0, 290.0, 294.0, 292.0, 290.0, 276.0, 257.0, 229.0, 250.0, 293.0, 286.0, 278.0, 298.0, 293.0, 289.0, 273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7055510816247841, "mean_inference_ms": 1.2652897116048947, "mean_action_processing_ms": 0.13484551900006134, "mean_env_wait_ms": 0.8493587874707947, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6809600, "num_agent_steps_trained": 6809600, "num_env_steps_sampled": 3404800, "num_env_steps_trained": 3404800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3404800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6809600, "timers": {"training_iteration_time_ms": 3623.845, "learn_time_ms": 1094.074, "learn_throughput": 11699.389, "synch_weights_time_ms": 12.251}, "counters": {"num_env_steps_sampled": 3404800, "num_env_steps_trained": 3404800, "num_agent_steps_sampled": 6809600, "num_agent_steps_trained": 6809600}, "done": false, "episodes_total": 8512, "training_iteration": 266, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-45", "timestamp": 1666581465, "time_this_iter_s": 3.9619455337524414, "time_total_s": 1027.5574369430542, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1027.5574369430542, "timesteps_since_restore": 0, "iterations_since_restore": 266, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.383333333333336, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 132, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.04, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.65, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.96, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.65, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.65, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0002950136549770832, "policy_loss": -0.0006376116070896387, "vf_loss": 7.6767144203186035, "vf_explained_var": 0.6027138829231262, "kl": 0.0018985953647643328, "entropy": 0.850143551826477, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3417600, "num_env_steps_trained": 3417600, "num_agent_steps_sampled": 6835200, "num_agent_steps_trained": 6835200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 452.0, "episode_reward_mean": 571.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 285.805}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 132, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.04, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.57, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.65, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.96, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.67, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.49, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.29, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.51, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.44, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.65, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.65, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7054255631638321, "mean_inference_ms": 1.2650641076249054, "mean_action_processing_ms": 0.1348278615851263, "mean_env_wait_ms": 0.849377041592129, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 452.0, "episode_reward_mean": 571.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 224.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 285.805}, "hist_stats": {"episode_reward": [584.0, 590.0, 479.0, 587.0, 624.0, 587.0, 579.0, 579.0, 630.0, 570.0, 639.0, 582.0, 579.0, 576.0, 579.0, 576.0, 579.0, 519.0, 579.0, 573.0, 573.0, 573.0, 627.0, 582.0, 522.0, 581.0, 573.0, 576.0, 579.0, 576.0, 524.0, 567.0, 525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [273.0, 311.0, 303.0, 287.0, 241.0, 238.0, 296.0, 291.0, 316.0, 308.0, 296.0, 291.0, 292.0, 287.0, 293.0, 286.0, 323.0, 307.0, 284.0, 286.0, 321.0, 318.0, 292.0, 290.0, 287.0, 292.0, 293.0, 283.0, 288.0, 291.0, 289.0, 287.0, 295.0, 284.0, 262.0, 257.0, 300.0, 279.0, 285.0, 288.0, 294.0, 279.0, 286.0, 287.0, 323.0, 304.0, 290.0, 292.0, 257.0, 265.0, 288.0, 293.0, 288.0, 285.0, 285.0, 291.0, 289.0, 290.0, 284.0, 292.0, 261.0, 263.0, 286.0, 281.0, 260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7054255631638321, "mean_inference_ms": 1.2650641076249054, "mean_action_processing_ms": 0.1348278615851263, "mean_env_wait_ms": 0.849377041592129, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6835200, "num_agent_steps_trained": 6835200, "num_env_steps_sampled": 3417600, "num_env_steps_trained": 3417600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3417600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6835200, "timers": {"training_iteration_time_ms": 3637.345, "learn_time_ms": 1097.041, "learn_throughput": 11667.754, "synch_weights_time_ms": 11.519}, "counters": {"num_env_steps_sampled": 3417600, "num_env_steps_trained": 3417600, "num_agent_steps_sampled": 6835200, "num_agent_steps_trained": 6835200}, "done": false, "episodes_total": 8544, "training_iteration": 267, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-49", "timestamp": 1666581469, "time_this_iter_s": 3.773404359817505, "time_total_s": 1031.3308413028717, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1031.3308413028717, "timesteps_since_restore": 0, "iterations_since_restore": 267, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.02, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.44, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.59, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.59, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.59, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004363615065813065, "policy_loss": 0.00400199368596077, "vf_loss": 7.881130218505859, "vf_explained_var": 0.5937398672103882, "kl": 0.002955435309559107, "entropy": 0.8529794812202454, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3430400, "num_env_steps_trained": 3430400, "num_agent_steps_sampled": 6860800, "num_agent_steps_trained": 6860800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 568.62, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.31}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.02, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.93, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.44, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.79, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.59, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.53, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.42, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.59, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.59, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7053109366828282, "mean_inference_ms": 1.2648639446405867, "mean_action_processing_ms": 0.13481287585190244, "mean_env_wait_ms": 0.8494139376583145, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 568.62, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.31}, "hist_stats": {"episode_reward": [525.0, 582.0, 573.0, 582.0, 519.0, 576.0, 579.0, 452.0, 582.0, 570.0, 570.0, 525.0, 582.0, 573.0, 576.0, 527.0, 567.0, 579.0, 582.0, 582.0, 533.0, 576.0, 522.0, 576.0, 576.0, 516.0, 584.0, 582.0, 582.0, 576.0, 587.0, 579.0, 522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 297.0, 285.0, 282.0, 291.0, 299.0, 283.0, 258.0, 261.0, 290.0, 286.0, 298.0, 281.0, 224.0, 228.0, 296.0, 286.0, 290.0, 280.0, 289.0, 281.0, 267.0, 258.0, 293.0, 289.0, 292.0, 281.0, 293.0, 283.0, 269.0, 258.0, 282.0, 285.0, 296.0, 283.0, 292.0, 290.0, 298.0, 284.0, 260.0, 273.0, 290.0, 286.0, 261.0, 261.0, 291.0, 285.0, 288.0, 288.0, 264.0, 252.0, 291.0, 293.0, 292.0, 290.0, 285.0, 297.0, 275.0, 301.0, 297.0, 290.0, 288.0, 291.0, 258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7053109366828282, "mean_inference_ms": 1.2648639446405867, "mean_action_processing_ms": 0.13481287585190244, "mean_env_wait_ms": 0.8494139376583145, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6860800, "num_agent_steps_trained": 6860800, "num_env_steps_sampled": 3430400, "num_env_steps_trained": 3430400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3430400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6860800, "timers": {"training_iteration_time_ms": 3657.302, "learn_time_ms": 1113.485, "learn_throughput": 11495.444, "synch_weights_time_ms": 12.352}, "counters": {"num_env_steps_sampled": 3430400, "num_env_steps_trained": 3430400, "num_agent_steps_sampled": 6860800, "num_agent_steps_trained": 6860800}, "done": false, "episodes_total": 8576, "training_iteration": 268, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-53", "timestamp": 1666581473, "time_this_iter_s": 3.783074140548706, "time_total_s": 1035.1139154434204, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1035.1139154434204, "timesteps_since_restore": 0, "iterations_since_restore": 268, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.033333333333335, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.46, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.76, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.76, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.76, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009098669979721308, "policy_loss": 0.000542132300324738, "vf_loss": 7.941966533660889, "vf_explained_var": 0.5730843544006348, "kl": 0.0019231629557907581, "entropy": 0.8529212474822998, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3443200, "num_env_steps_trained": 3443200, "num_agent_steps_sampled": 6886400, "num_agent_steps_trained": 6886400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 571.86, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.93}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.46, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.93, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.76, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.76, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.76, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7051935011860442, "mean_inference_ms": 1.264642375620096, "mean_action_processing_ms": 0.13480085895955796, "mean_env_wait_ms": 0.8493182842486506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 571.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.93}, "hist_stats": {"episode_reward": [522.0, 584.0, 582.0, 519.0, 582.0, 584.0, 576.0, 582.0, 581.0, 579.0, 582.0, 576.0, 579.0, 576.0, 579.0, 633.0, 573.0, 576.0, 570.0, 576.0, 576.0, 519.0, 624.0, 576.0, 581.0, 576.0, 579.0, 570.0, 576.0, 582.0, 576.0, 576.0, 573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 264.0, 287.0, 297.0, 283.0, 299.0, 259.0, 260.0, 296.0, 286.0, 298.0, 286.0, 293.0, 283.0, 296.0, 286.0, 298.0, 283.0, 283.0, 296.0, 288.0, 294.0, 291.0, 285.0, 291.0, 288.0, 292.0, 284.0, 297.0, 282.0, 319.0, 314.0, 290.0, 283.0, 279.0, 297.0, 297.0, 273.0, 291.0, 285.0, 285.0, 291.0, 250.0, 269.0, 303.0, 321.0, 282.0, 294.0, 290.0, 291.0, 295.0, 281.0, 288.0, 291.0, 284.0, 286.0, 281.0, 295.0, 287.0, 295.0, 286.0, 290.0, 287.0, 289.0, 284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7051935011860442, "mean_inference_ms": 1.264642375620096, "mean_action_processing_ms": 0.13480085895955796, "mean_env_wait_ms": 0.8493182842486506, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6886400, "num_agent_steps_trained": 6886400, "num_env_steps_sampled": 3443200, "num_env_steps_trained": 3443200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3443200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6886400, "timers": {"training_iteration_time_ms": 3656.099, "learn_time_ms": 1112.816, "learn_throughput": 11502.35, "synch_weights_time_ms": 13.273}, "counters": {"num_env_steps_sampled": 3443200, "num_env_steps_trained": 3443200, "num_agent_steps_sampled": 6886400, "num_agent_steps_trained": 6886400}, "done": false, "episodes_total": 8608, "training_iteration": 269, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-17-57", "timestamp": 1666581477, "time_this_iter_s": 3.5823559761047363, "time_total_s": 1038.6962714195251, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1038.6962714195251, "timesteps_since_restore": 0, "iterations_since_restore": 269, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.560000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.21, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.12, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001868363469839096, "policy_loss": 0.001498084980994463, "vf_loss": 7.933145523071289, "vf_explained_var": 0.59946209192276, "kl": 0.0020921118557453156, "entropy": 0.846068263053894, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3456000, "num_env_steps_trained": 3456000, "num_agent_steps_sampled": 6912000, "num_agent_steps_trained": 6912000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 571.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.605}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.21, "shaped_reward_min": 119, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.11, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.12, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.85, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7050858585894852, "mean_inference_ms": 1.2643870852329173, "mean_action_processing_ms": 0.1347919372266095, "mean_env_wait_ms": 0.849190680492891, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 359.0, "episode_reward_mean": 571.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.605}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 573.0, 573.0, 576.0, 576.0, 579.0, 584.0, 582.0, 576.0, 573.0, 570.0, 359.0, 573.0, 579.0, 582.0, 525.0, 576.0, 570.0, 582.0, 630.0, 579.0, 579.0, 576.0, 576.0, 630.0, 579.0, 576.0, 573.0, 584.0, 579.0, 579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 291.0, 285.0, 286.0, 296.0, 284.0, 289.0, 288.0, 285.0, 289.0, 287.0, 296.0, 280.0, 291.0, 288.0, 299.0, 285.0, 298.0, 284.0, 289.0, 287.0, 286.0, 287.0, 291.0, 279.0, 175.0, 184.0, 290.0, 283.0, 285.0, 294.0, 275.0, 307.0, 268.0, 257.0, 290.0, 286.0, 284.0, 286.0, 288.0, 294.0, 306.0, 324.0, 290.0, 289.0, 290.0, 289.0, 287.0, 289.0, 282.0, 294.0, 317.0, 313.0, 291.0, 288.0, 290.0, 286.0, 281.0, 292.0, 296.0, 288.0, 290.0, 289.0, 294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7050858585894852, "mean_inference_ms": 1.2643870852329173, "mean_action_processing_ms": 0.1347919372266095, "mean_env_wait_ms": 0.849190680492891, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6912000, "num_agent_steps_trained": 6912000, "num_env_steps_sampled": 3456000, "num_env_steps_trained": 3456000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3456000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6912000, "timers": {"training_iteration_time_ms": 3646.365, "learn_time_ms": 1107.179, "learn_throughput": 11560.911, "synch_weights_time_ms": 12.349}, "counters": {"num_env_steps_sampled": 3456000, "num_env_steps_trained": 3456000, "num_agent_steps_sampled": 6912000, "num_agent_steps_trained": 6912000}, "done": false, "episodes_total": 8640, "training_iteration": 270, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-01", "timestamp": 1666581481, "time_this_iter_s": 3.6196086406707764, "time_total_s": 1042.315880060196, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1042.315880060196, "timesteps_since_restore": 0, "iterations_since_restore": 270, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.76666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.44, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.07, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.23, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.64, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.23, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.64, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.23, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.64, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006792682688683271, "policy_loss": 0.00031318137189373374, "vf_loss": 7.904168128967285, "vf_explained_var": 0.5880827307701111, "kl": 0.002125304192304611, "entropy": 0.8486584424972534, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3468800, "num_env_steps_trained": 3468800, "num_agent_steps_sampled": 6937600, "num_agent_steps_trained": 6937600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 574.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.22}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.44, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.72, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.07, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.13, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.23, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.64, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.2, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.23, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.64, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.23, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.64, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7049859829023956, "mean_inference_ms": 1.2641320160489828, "mean_action_processing_ms": 0.13478291473748163, "mean_env_wait_ms": 0.8490647234068114, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 574.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 199.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.22}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 408.0, 576.0, 576.0, 630.0, 573.0, 468.0, 587.0, 573.0, 573.0, 570.0, 570.0, 584.0, 582.0, 573.0, 579.0, 630.0, 573.0, 633.0, 570.0, 570.0, 582.0, 576.0, 579.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 291.0, 291.0, 287.0, 292.0, 199.0, 209.0, 284.0, 292.0, 288.0, 288.0, 311.0, 319.0, 284.0, 289.0, 219.0, 249.0, 291.0, 296.0, 283.0, 290.0, 289.0, 284.0, 279.0, 291.0, 289.0, 281.0, 291.0, 293.0, 294.0, 288.0, 287.0, 286.0, 288.0, 291.0, 316.0, 314.0, 292.0, 281.0, 310.0, 323.0, 278.0, 292.0, 278.0, 292.0, 295.0, 287.0, 287.0, 289.0, 285.0, 294.0, 289.0, 287.0, 283.0, 296.0, 281.0, 295.0, 293.0, 283.0, 284.0, 295.0, 294.0, 288.0, 260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7049859829023956, "mean_inference_ms": 1.2641320160489828, "mean_action_processing_ms": 0.13478291473748163, "mean_env_wait_ms": 0.8490647234068114, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6937600, "num_agent_steps_trained": 6937600, "num_env_steps_sampled": 3468800, "num_env_steps_trained": 3468800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3468800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6937600, "timers": {"training_iteration_time_ms": 3638.877, "learn_time_ms": 1110.437, "learn_throughput": 11526.989, "synch_weights_time_ms": 12.903}, "counters": {"num_env_steps_sampled": 3468800, "num_env_steps_trained": 3468800, "num_agent_steps_sampled": 6937600, "num_agent_steps_trained": 6937600}, "done": false, "episodes_total": 8672, "training_iteration": 271, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-04", "timestamp": 1666581484, "time_this_iter_s": 3.6376683712005615, "time_total_s": 1045.9535484313965, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1045.9535484313965, "timesteps_since_restore": 0, "iterations_since_restore": 271, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.8, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 178.05, "shaped_reward_min": 135, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.98, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.81, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001299827708862722, "policy_loss": 0.0009380197152495384, "vf_loss": 7.8011393547058105, "vf_explained_var": 0.5883727669715881, "kl": 0.002125523053109646, "entropy": 0.8366072177886963, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3481600, "num_env_steps_trained": 3481600, "num_agent_steps_sampled": 6963200, "num_agent_steps_trained": 6963200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 464.0, "episode_reward_mean": 577.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 288.825}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 178.05, "shaped_reward_min": 135, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.98, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.23, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.81, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.23, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7048976136960431, "mean_inference_ms": 1.2638769872483644, "mean_action_processing_ms": 0.13477343774655234, "mean_env_wait_ms": 0.8489374954857171, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 464.0, "episode_reward_mean": 577.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 288.825}, "hist_stats": {"episode_reward": [522.0, 510.0, 570.0, 519.0, 579.0, 573.0, 579.0, 587.0, 525.0, 627.0, 582.0, 579.0, 573.0, 564.0, 584.0, 573.0, 576.0, 584.0, 627.0, 573.0, 582.0, 576.0, 495.0, 576.0, 525.0, 630.0, 579.0, 582.0, 527.0, 579.0, 581.0, 579.0, 576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 262.0, 249.0, 261.0, 287.0, 283.0, 252.0, 267.0, 286.0, 293.0, 281.0, 292.0, 281.0, 298.0, 302.0, 285.0, 262.0, 263.0, 311.0, 316.0, 288.0, 294.0, 290.0, 289.0, 291.0, 282.0, 273.0, 291.0, 287.0, 297.0, 292.0, 281.0, 285.0, 291.0, 289.0, 295.0, 317.0, 310.0, 282.0, 291.0, 285.0, 297.0, 288.0, 288.0, 249.0, 246.0, 287.0, 289.0, 267.0, 258.0, 314.0, 316.0, 284.0, 295.0, 285.0, 297.0, 271.0, 256.0, 287.0, 292.0, 287.0, 294.0, 287.0, 292.0, 287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7048976136960431, "mean_inference_ms": 1.2638769872483644, "mean_action_processing_ms": 0.13477343774655234, "mean_env_wait_ms": 0.8489374954857171, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6963200, "num_agent_steps_trained": 6963200, "num_env_steps_sampled": 3481600, "num_env_steps_trained": 3481600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3481600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6963200, "timers": {"training_iteration_time_ms": 3625.048, "learn_time_ms": 1102.444, "learn_throughput": 11610.568, "synch_weights_time_ms": 12.999}, "counters": {"num_env_steps_sampled": 3481600, "num_env_steps_trained": 3481600, "num_agent_steps_sampled": 6963200, "num_agent_steps_trained": 6963200}, "done": false, "episodes_total": 8704, "training_iteration": 272, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-08", "timestamp": 1666581488, "time_this_iter_s": 3.6092796325683594, "time_total_s": 1049.5628280639648, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1049.5628280639648, "timesteps_since_restore": 0, "iterations_since_restore": 272, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.979999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 179.3, "shaped_reward_min": 105, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.65, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.57, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.18, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.18, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.18, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003036621492356062, "policy_loss": -0.0034028268419206142, "vf_loss": 7.827790260314941, "vf_explained_var": 0.5869630575180054, "kl": 0.001971776131540537, "entropy": 0.8331469297409058, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3494400, "num_env_steps_trained": 3494400, "num_agent_steps_sampled": 6988800, "num_agent_steps_trained": 6988800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 345.0, "episode_reward_mean": 578.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 170.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 289.45}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 179.3, "shaped_reward_min": 105, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.65, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.57, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.18, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.41, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.18, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.18, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7048025547681772, "mean_inference_ms": 1.2636074586322337, "mean_action_processing_ms": 0.1347604788542491, "mean_env_wait_ms": 0.8487813382272864, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 345.0, "episode_reward_mean": 578.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 170.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 289.45}, "hist_stats": {"episode_reward": [576.0, 524.0, 582.0, 579.0, 633.0, 579.0, 573.0, 579.0, 584.0, 581.0, 624.0, 579.0, 567.0, 579.0, 579.0, 582.0, 630.0, 584.0, 639.0, 636.0, 584.0, 582.0, 573.0, 573.0, 573.0, 587.0, 582.0, 579.0, 464.0, 573.0, 576.0, 582.0, 579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 289.0, 251.0, 273.0, 293.0, 289.0, 293.0, 286.0, 315.0, 318.0, 291.0, 288.0, 290.0, 283.0, 296.0, 283.0, 292.0, 292.0, 281.0, 300.0, 316.0, 308.0, 287.0, 292.0, 282.0, 285.0, 283.0, 296.0, 300.0, 279.0, 295.0, 287.0, 309.0, 321.0, 299.0, 285.0, 322.0, 317.0, 316.0, 320.0, 286.0, 298.0, 295.0, 287.0, 287.0, 286.0, 280.0, 293.0, 283.0, 290.0, 288.0, 299.0, 288.0, 294.0, 304.0, 275.0, 235.0, 229.0, 285.0, 288.0, 288.0, 288.0, 291.0, 291.0, 297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7048025547681772, "mean_inference_ms": 1.2636074586322337, "mean_action_processing_ms": 0.1347604788542491, "mean_env_wait_ms": 0.8487813382272864, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 6988800, "num_agent_steps_trained": 6988800, "num_env_steps_sampled": 3494400, "num_env_steps_trained": 3494400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3494400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 6988800, "timers": {"training_iteration_time_ms": 3607.661, "learn_time_ms": 1093.892, "learn_throughput": 11701.342, "synch_weights_time_ms": 12.561}, "counters": {"num_env_steps_sampled": 3494400, "num_env_steps_trained": 3494400, "num_agent_steps_sampled": 6988800, "num_agent_steps_trained": 6988800}, "done": false, "episodes_total": 8736, "training_iteration": 273, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-12", "timestamp": 1666581492, "time_this_iter_s": 3.537813425064087, "time_total_s": 1053.100641489029, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1053.100641489029, "timesteps_since_restore": 0, "iterations_since_restore": 273, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.400000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.9, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.81, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 1.652922946959734e-05, "policy_loss": -0.0003470139345154166, "vf_loss": 7.779203414916992, "vf_explained_var": 0.5858237147331238, "kl": 0.002200545510277152, "entropy": 0.828750729560852, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3507200, "num_env_steps_trained": 3507200, "num_agent_steps_sampled": 7014400, "num_agent_steps_trained": 7014400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.45}, "custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.9, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.81, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.35, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7046816164464604, "mean_inference_ms": 1.2633067797403978, "mean_action_processing_ms": 0.13474408191316176, "mean_env_wait_ms": 0.8485969737751016, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.45}, "hist_stats": {"episode_reward": [579.0, 536.0, 525.0, 627.0, 584.0, 576.0, 590.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 627.0, 530.0, 630.0, 579.0, 587.0, 582.0, 587.0, 587.0, 576.0, 581.0, 533.0, 582.0, 582.0, 639.0, 516.0, 582.0, 582.0, 630.0, 576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 282.0, 269.0, 267.0, 265.0, 260.0, 313.0, 314.0, 284.0, 300.0, 282.0, 294.0, 295.0, 295.0, 278.0, 301.0, 289.0, 290.0, 288.0, 291.0, 286.0, 293.0, 284.0, 295.0, 281.0, 295.0, 287.0, 289.0, 319.0, 308.0, 268.0, 262.0, 314.0, 316.0, 286.0, 293.0, 289.0, 298.0, 293.0, 289.0, 291.0, 296.0, 293.0, 294.0, 283.0, 293.0, 292.0, 289.0, 264.0, 269.0, 291.0, 291.0, 290.0, 292.0, 320.0, 319.0, 259.0, 257.0, 298.0, 284.0, 292.0, 290.0, 321.0, 309.0, 293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7046816164464604, "mean_inference_ms": 1.2633067797403978, "mean_action_processing_ms": 0.13474408191316176, "mean_env_wait_ms": 0.8485969737751016, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7014400, "num_agent_steps_trained": 7014400, "num_env_steps_sampled": 3507200, "num_env_steps_trained": 3507200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3507200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7014400, "timers": {"training_iteration_time_ms": 3601.405, "learn_time_ms": 1092.369, "learn_throughput": 11717.652, "synch_weights_time_ms": 13.092}, "counters": {"num_env_steps_sampled": 3507200, "num_env_steps_trained": 3507200, "num_agent_steps_sampled": 7014400, "num_agent_steps_trained": 7014400}, "done": false, "episodes_total": 8768, "training_iteration": 274, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-16", "timestamp": 1666581496, "time_this_iter_s": 3.5803771018981934, "time_total_s": 1056.6810185909271, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1056.6810185909271, "timesteps_since_restore": 0, "iterations_since_restore": 274, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.180000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.73, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.89, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.51, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.51, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.51, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0033253917936235666, "policy_loss": -0.003682898124679923, "vf_loss": 7.720019340515137, "vf_explained_var": 0.5786682963371277, "kl": 0.0020081857219338417, "entropy": 0.8289890289306641, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3520000, "num_env_steps_trained": 3520000, "num_agent_steps_sampled": 7040000, "num_agent_steps_trained": 7040000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.73, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.365}, "custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.73, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.87, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.89, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.51, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 2, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.51, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.51, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7045692651464103, "mean_inference_ms": 1.263009476276083, "mean_action_processing_ms": 0.13472837140180471, "mean_env_wait_ms": 0.8484162488219968, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.365}, "hist_stats": {"episode_reward": [576.0, 630.0, 570.0, 579.0, 582.0, 582.0, 579.0, 582.0, 576.0, 627.0, 576.0, 582.0, 579.0, 345.0, 582.0, 627.0, 522.0, 579.0, 587.0, 576.0, 587.0, 582.0, 582.0, 576.0, 581.0, 576.0, 579.0, 530.0, 587.0, 584.0, 576.0, 587.0, 590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 311.0, 319.0, 289.0, 281.0, 291.0, 288.0, 293.0, 289.0, 285.0, 297.0, 286.0, 293.0, 295.0, 287.0, 283.0, 293.0, 321.0, 306.0, 284.0, 292.0, 295.0, 287.0, 286.0, 293.0, 175.0, 170.0, 288.0, 294.0, 313.0, 314.0, 260.0, 262.0, 285.0, 294.0, 297.0, 290.0, 274.0, 302.0, 291.0, 296.0, 290.0, 292.0, 291.0, 291.0, 288.0, 288.0, 288.0, 293.0, 293.0, 283.0, 288.0, 291.0, 270.0, 260.0, 301.0, 286.0, 294.0, 290.0, 295.0, 281.0, 291.0, 296.0, 300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7045692651464103, "mean_inference_ms": 1.263009476276083, "mean_action_processing_ms": 0.13472837140180471, "mean_env_wait_ms": 0.8484162488219968, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7040000, "num_agent_steps_trained": 7040000, "num_env_steps_sampled": 3520000, "num_env_steps_trained": 3520000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3520000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7040000, "timers": {"training_iteration_time_ms": 3611.327, "learn_time_ms": 1100.351, "learn_throughput": 11632.654, "synch_weights_time_ms": 12.268}, "counters": {"num_env_steps_sampled": 3520000, "num_env_steps_trained": 3520000, "num_agent_steps_sampled": 7040000, "num_agent_steps_trained": 7040000}, "done": false, "episodes_total": 8800, "training_iteration": 275, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-20", "timestamp": 1666581500, "time_this_iter_s": 3.6779873371124268, "time_total_s": 1060.3590059280396, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1060.3590059280396, "timesteps_since_restore": 0, "iterations_since_restore": 275, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.0, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 179.48, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.76, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.36, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011727798264473677, "policy_loss": 0.0008132933871820569, "vf_loss": 7.713962554931641, "vf_explained_var": 0.5945428013801575, "kl": 0.0018068891949951649, "entropy": 0.8238167762756348, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3532800, "num_env_steps_trained": 3532800, "num_agent_steps_sampled": 7065600, "num_agent_steps_trained": 7065600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 581.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 290.94}, "custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 179.48, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.76, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.36, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.5, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7044540486410702, "mean_inference_ms": 1.2627304137168671, "mean_action_processing_ms": 0.13471491738433086, "mean_env_wait_ms": 0.8482519969369372, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 581.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 290.94}, "hist_stats": {"episode_reward": [590.0, 579.0, 579.0, 584.0, 579.0, 584.0, 579.0, 576.0, 582.0, 630.0, 576.0, 579.0, 579.0, 576.0, 630.0, 570.0, 579.0, 576.0, 627.0, 576.0, 633.0, 630.0, 630.0, 576.0, 579.0, 579.0, 579.0, 582.0, 630.0, 630.0, 576.0, 579.0, 510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [300.0, 290.0, 286.0, 293.0, 291.0, 288.0, 294.0, 290.0, 290.0, 289.0, 285.0, 299.0, 288.0, 291.0, 286.0, 290.0, 285.0, 297.0, 314.0, 316.0, 288.0, 288.0, 295.0, 284.0, 292.0, 287.0, 288.0, 288.0, 306.0, 324.0, 282.0, 288.0, 293.0, 286.0, 295.0, 281.0, 320.0, 307.0, 292.0, 284.0, 316.0, 317.0, 315.0, 315.0, 314.0, 316.0, 284.0, 292.0, 293.0, 286.0, 289.0, 290.0, 291.0, 288.0, 283.0, 299.0, 303.0, 327.0, 318.0, 312.0, 286.0, 290.0, 291.0, 288.0, 249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7044540486410702, "mean_inference_ms": 1.2627304137168671, "mean_action_processing_ms": 0.13471491738433086, "mean_env_wait_ms": 0.8482519969369372, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7065600, "num_agent_steps_trained": 7065600, "num_env_steps_sampled": 3532800, "num_env_steps_trained": 3532800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3532800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7065600, "timers": {"training_iteration_time_ms": 3585.259, "learn_time_ms": 1099.51, "learn_throughput": 11641.547, "synch_weights_time_ms": 11.475}, "counters": {"num_env_steps_sampled": 3532800, "num_env_steps_trained": 3532800, "num_agent_steps_sampled": 7065600, "num_agent_steps_trained": 7065600}, "done": false, "episodes_total": 8832, "training_iteration": 276, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-23", "timestamp": 1666581503, "time_this_iter_s": 3.70689058303833, "time_total_s": 1064.0658965110779, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1064.0658965110779, "timesteps_since_restore": 0, "iterations_since_restore": 276, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.566666666666666, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.89, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003189620329067111, "policy_loss": -0.000677441421430558, "vf_loss": 7.689172744750977, "vf_explained_var": 0.5767568349838257, "kl": 0.002302885055541992, "entropy": 0.8208730220794678, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3545600, "num_env_steps_trained": 3545600, "num_agent_steps_sampled": 7091200, "num_agent_steps_trained": 7091200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.49, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 289.245}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.89, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.01, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.63, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.95, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.95, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.95, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704359598352958, "mean_inference_ms": 1.2624859412248715, "mean_action_processing_ms": 0.1347036538331141, "mean_env_wait_ms": 0.8481206399033451, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 578.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 289.245}, "hist_stats": {"episode_reward": [510.0, 582.0, 294.0, 630.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 582.0, 570.0, 576.0, 530.0, 639.0, 582.0, 582.0, 579.0, 582.0, 587.0, 582.0, 584.0, 539.0, 573.0, 627.0, 582.0, 582.0, 576.0, 582.0, 584.0, 579.0, 582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [249.0, 261.0, 292.0, 290.0, 143.0, 151.0, 314.0, 316.0, 289.0, 284.0, 285.0, 294.0, 286.0, 290.0, 287.0, 292.0, 295.0, 281.0, 292.0, 290.0, 282.0, 294.0, 287.0, 295.0, 286.0, 284.0, 288.0, 288.0, 275.0, 255.0, 320.0, 319.0, 294.0, 288.0, 292.0, 290.0, 292.0, 287.0, 282.0, 300.0, 296.0, 291.0, 293.0, 289.0, 296.0, 288.0, 267.0, 272.0, 292.0, 281.0, 320.0, 307.0, 295.0, 287.0, 290.0, 292.0, 285.0, 291.0, 298.0, 284.0, 277.0, 307.0, 284.0, 295.0, 296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.704359598352958, "mean_inference_ms": 1.2624859412248715, "mean_action_processing_ms": 0.1347036538331141, "mean_env_wait_ms": 0.8481206399033451, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7091200, "num_agent_steps_trained": 7091200, "num_env_steps_sampled": 3545600, "num_env_steps_trained": 3545600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3545600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7091200, "timers": {"training_iteration_time_ms": 3576.088, "learn_time_ms": 1106.017, "learn_throughput": 11573.055, "synch_weights_time_ms": 11.964}, "counters": {"num_env_steps_sampled": 3545600, "num_env_steps_trained": 3545600, "num_agent_steps_sampled": 7091200, "num_agent_steps_trained": 7091200}, "done": false, "episodes_total": 8864, "training_iteration": 277, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-27", "timestamp": 1666581507, "time_this_iter_s": 3.6707231998443604, "time_total_s": 1067.7366197109222, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1067.7366197109222, "timesteps_since_restore": 0, "iterations_since_restore": 277, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.84, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.14, "shaped_reward_min": 80, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.62, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0021596523001790047, "policy_loss": 0.0017828154377639294, "vf_loss": 7.876211166381836, "vf_explained_var": 0.5684356689453125, "kl": 0.0026554595679044724, "entropy": 0.8215670585632324, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3558400, "num_env_steps_trained": 3558400, "num_agent_steps_sampled": 7116800, "num_agent_steps_trained": 7116800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 240.0, "episode_reward_mean": 575.74, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.87}, "custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.14, "shaped_reward_min": 80, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.62, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.12, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.08, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.45, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.12, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.12, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7042493166823506, "mean_inference_ms": 1.262217330634093, "mean_action_processing_ms": 0.13468999463424317, "mean_env_wait_ms": 0.8479689242467895, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 240.0, "episode_reward_mean": 575.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.87}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 582.0, 582.0, 573.0, 570.0, 573.0, 582.0, 582.0, 582.0, 579.0, 636.0, 582.0, 587.0, 584.0, 587.0, 579.0, 573.0, 525.0, 624.0, 587.0, 579.0, 630.0, 582.0, 570.0, 576.0, 582.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 291.0, 288.0, 290.0, 286.0, 285.0, 297.0, 295.0, 287.0, 288.0, 285.0, 295.0, 275.0, 284.0, 289.0, 292.0, 290.0, 298.0, 284.0, 296.0, 286.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 293.0, 294.0, 292.0, 292.0, 294.0, 293.0, 291.0, 288.0, 293.0, 280.0, 263.0, 262.0, 320.0, 304.0, 290.0, 297.0, 287.0, 292.0, 310.0, 320.0, 301.0, 281.0, 293.0, 277.0, 292.0, 284.0, 295.0, 287.0, 301.0, 281.0, 280.0, 293.0, 279.0, 294.0, 295.0, 287.0, 288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7042493166823506, "mean_inference_ms": 1.262217330634093, "mean_action_processing_ms": 0.13468999463424317, "mean_env_wait_ms": 0.8479689242467895, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7116800, "num_agent_steps_trained": 7116800, "num_env_steps_sampled": 3558400, "num_env_steps_trained": 3558400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3558400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7116800, "timers": {"training_iteration_time_ms": 3572.014, "learn_time_ms": 1102.164, "learn_throughput": 11613.517, "synch_weights_time_ms": 11.764}, "counters": {"num_env_steps_sampled": 3558400, "num_env_steps_trained": 3558400, "num_agent_steps_sampled": 7116800, "num_agent_steps_trained": 7116800}, "done": false, "episodes_total": 8896, "training_iteration": 278, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-31", "timestamp": 1666581511, "time_this_iter_s": 3.733790397644043, "time_total_s": 1071.4704101085663, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1071.4704101085663, "timesteps_since_restore": 0, "iterations_since_restore": 278, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.716666666666665, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 176.43, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.76, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.76, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.76, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020696595311164856, "policy_loss": 0.001691511832177639, "vf_loss": 7.902426242828369, "vf_explained_var": 0.5661084651947021, "kl": 0.002486064564436674, "entropy": 0.8241865634918213, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3571200, "num_env_steps_trained": 3571200, "num_agent_steps_sampled": 7142400, "num_agent_steps_trained": 7142400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 569.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.815}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 176.43, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.19, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.76, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.27, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.76, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.76, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7041604228757821, "mean_inference_ms": 1.2619712995732546, "mean_action_processing_ms": 0.13468033891449124, "mean_env_wait_ms": 0.8478502017312357, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 569.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.815}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 627.0, 579.0, 573.0, 582.0, 579.0, 573.0, 587.0, 516.0, 579.0, 630.0, 582.0, 576.0, 633.0, 582.0, 579.0, 633.0, 582.0, 582.0, 573.0, 582.0, 582.0, 584.0, 576.0, 579.0, 582.0, 576.0, 582.0, 590.0, 525.0, 579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 292.0, 287.0, 289.0, 290.0, 304.0, 323.0, 287.0, 292.0, 278.0, 295.0, 290.0, 292.0, 285.0, 294.0, 286.0, 287.0, 291.0, 296.0, 257.0, 259.0, 299.0, 280.0, 313.0, 317.0, 295.0, 287.0, 289.0, 287.0, 313.0, 320.0, 292.0, 290.0, 284.0, 295.0, 317.0, 316.0, 295.0, 287.0, 294.0, 288.0, 289.0, 284.0, 286.0, 296.0, 291.0, 291.0, 280.0, 304.0, 287.0, 289.0, 294.0, 285.0, 287.0, 295.0, 284.0, 292.0, 293.0, 289.0, 299.0, 291.0, 267.0, 258.0, 293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7041604228757821, "mean_inference_ms": 1.2619712995732546, "mean_action_processing_ms": 0.13468033891449124, "mean_env_wait_ms": 0.8478502017312357, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7142400, "num_agent_steps_trained": 7142400, "num_env_steps_sampled": 3571200, "num_env_steps_trained": 3571200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3571200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7142400, "timers": {"training_iteration_time_ms": 3587.641, "learn_time_ms": 1111.633, "learn_throughput": 11514.594, "synch_weights_time_ms": 10.828}, "counters": {"num_env_steps_sampled": 3571200, "num_env_steps_trained": 3571200, "num_agent_steps_sampled": 7142400, "num_agent_steps_trained": 7142400}, "done": false, "episodes_total": 8928, "training_iteration": 279, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-35", "timestamp": 1666581515, "time_this_iter_s": 3.741086959838867, "time_total_s": 1075.2114970684052, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1075.2114970684052, "timesteps_since_restore": 0, "iterations_since_restore": 279, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.033333333333335, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 175.76, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.51, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.84, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.89, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.84, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.89, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.84, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.89, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001337396795861423, "policy_loss": 0.0009652754524722695, "vf_loss": 7.775339126586914, "vf_explained_var": 0.5859867334365845, "kl": 0.0022643147967755795, "entropy": 0.8108214139938354, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3584000, "num_env_steps_trained": 3584000, "num_agent_steps_sampled": 7168000, "num_agent_steps_trained": 7168000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 567.76, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 283.88}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 175.76, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.51, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.84, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.89, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.54, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.7, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.31, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.12, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.84, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.89, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.84, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.89, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7040566160339953, "mean_inference_ms": 1.2617214516765958, "mean_action_processing_ms": 0.13467032915806704, "mean_env_wait_ms": 0.8477503990101285, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 567.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 283.88}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 579.0, 576.0, 576.0, 516.0, 579.0, 582.0, 582.0, 240.0, 297.0, 579.0, 579.0, 582.0, 587.0, 587.0, 582.0, 633.0, 576.0, 582.0, 579.0, 582.0, 633.0, 579.0, 582.0, 573.0, 579.0, 579.0, 582.0, 576.0, 576.0, 587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 289.0, 290.0, 301.0, 278.0, 296.0, 283.0, 294.0, 282.0, 293.0, 283.0, 261.0, 255.0, 282.0, 297.0, 288.0, 294.0, 291.0, 291.0, 117.0, 123.0, 148.0, 149.0, 290.0, 289.0, 290.0, 289.0, 291.0, 291.0, 293.0, 294.0, 280.0, 307.0, 298.0, 284.0, 313.0, 320.0, 301.0, 275.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 322.0, 311.0, 291.0, 288.0, 287.0, 295.0, 283.0, 290.0, 292.0, 287.0, 297.0, 282.0, 296.0, 286.0, 285.0, 291.0, 292.0, 284.0, 294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7040566160339953, "mean_inference_ms": 1.2617214516765958, "mean_action_processing_ms": 0.13467032915806704, "mean_env_wait_ms": 0.8477503990101285, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7168000, "num_agent_steps_trained": 7168000, "num_env_steps_sampled": 3584000, "num_env_steps_trained": 3584000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3584000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7168000, "timers": {"training_iteration_time_ms": 3600.953, "learn_time_ms": 1114.253, "learn_throughput": 11487.52, "synch_weights_time_ms": 11.515}, "counters": {"num_env_steps_sampled": 3584000, "num_env_steps_trained": 3584000, "num_agent_steps_sampled": 7168000, "num_agent_steps_trained": 7168000}, "done": false, "episodes_total": 8960, "training_iteration": 280, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-39", "timestamp": 1666581519, "time_this_iter_s": 3.7522544860839844, "time_total_s": 1078.9637515544891, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1078.9637515544891, "timesteps_since_restore": 0, "iterations_since_restore": 280, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.520000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 177.53, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.06, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0034903634805232286, "policy_loss": -0.0038492782041430473, "vf_loss": 7.702734470367432, "vf_explained_var": 0.5874840021133423, "kl": 0.0021524939220398664, "entropy": 0.8227143287658691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3596800, "num_env_steps_trained": 3596800, "num_agent_steps_sampled": 7193600, "num_agent_steps_trained": 7193600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 574.33, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 287.165}, "custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 177.53, "shaped_reward_min": 12, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.06, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.91, "onion_pickup_agent_1_min": 3, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.78, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.48, "useful_onion_pickup_agent_1_min": 3, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.24, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.65, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.32, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.15, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7039546134033401, "mean_inference_ms": 1.2616056310396504, "mean_action_processing_ms": 0.1346598567562352, "mean_env_wait_ms": 0.8477289895301865, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 12.0, "episode_reward_mean": 574.33, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 6.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 287.165}, "hist_stats": {"episode_reward": [587.0, 584.0, 576.0, 579.0, 584.0, 579.0, 579.0, 633.0, 582.0, 627.0, 639.0, 576.0, 582.0, 570.0, 587.0, 579.0, 579.0, 579.0, 579.0, 573.0, 576.0, 582.0, 582.0, 530.0, 582.0, 573.0, 12.0, 522.0, 579.0, 576.0, 536.0, 627.0, 570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 292.0, 292.0, 291.0, 285.0, 288.0, 291.0, 299.0, 285.0, 288.0, 291.0, 289.0, 290.0, 318.0, 315.0, 296.0, 286.0, 315.0, 312.0, 317.0, 322.0, 283.0, 293.0, 294.0, 288.0, 286.0, 284.0, 298.0, 289.0, 282.0, 297.0, 283.0, 296.0, 291.0, 288.0, 286.0, 293.0, 287.0, 286.0, 283.0, 293.0, 295.0, 287.0, 282.0, 300.0, 266.0, 264.0, 288.0, 294.0, 289.0, 284.0, 6.0, 6.0, 257.0, 265.0, 283.0, 296.0, 284.0, 292.0, 270.0, 266.0, 306.0, 321.0, 292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7039546134033401, "mean_inference_ms": 1.2616056310396504, "mean_action_processing_ms": 0.1346598567562352, "mean_env_wait_ms": 0.8477289895301865, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7193600, "num_agent_steps_trained": 7193600, "num_env_steps_sampled": 3596800, "num_env_steps_trained": 3596800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3596800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7193600, "timers": {"training_iteration_time_ms": 3631.142, "learn_time_ms": 1113.579, "learn_throughput": 11494.468, "synch_weights_time_ms": 11.537}, "counters": {"num_env_steps_sampled": 3596800, "num_env_steps_trained": 3596800, "num_agent_steps_sampled": 7193600, "num_agent_steps_trained": 7193600}, "done": false, "episodes_total": 8992, "training_iteration": 281, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-43", "timestamp": 1666581523, "time_this_iter_s": 3.939497232437134, "time_total_s": 1082.9032487869263, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1082.9032487869263, "timesteps_since_restore": 0, "iterations_since_restore": 281, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.5, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 200.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 179.08, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.78, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00012389960465952754, "policy_loss": -0.0002203599433414638, "vf_loss": 7.57411527633667, "vf_explained_var": 0.614063560962677, "kl": 0.0017929250607267022, "entropy": 0.8263012170791626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3609600, "num_env_steps_trained": 3609600, "num_agent_steps_sampled": 7219200, "num_agent_steps_trained": 7219200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 579.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 289.74}, "custom_metrics": {"sparse_reward_mean": 200.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 179.08, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.78, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.01, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.27, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.64, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.24, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.13, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.01, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.27, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.01, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.27, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7038370220595624, "mean_inference_ms": 1.261504645612948, "mean_action_processing_ms": 0.1346470754198018, "mean_env_wait_ms": 0.8476888602932366, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 579.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 202.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 289.74}, "hist_stats": {"episode_reward": [570.0, 576.0, 587.0, 587.0, 576.0, 633.0, 579.0, 584.0, 582.0, 582.0, 582.0, 455.0, 582.0, 576.0, 582.0, 582.0, 587.0, 627.0, 579.0, 519.0, 579.0, 587.0, 405.0, 579.0, 630.0, 630.0, 633.0, 567.0, 576.0, 587.0, 576.0, 584.0, 587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 278.0, 285.0, 291.0, 294.0, 293.0, 286.0, 301.0, 286.0, 290.0, 325.0, 308.0, 287.0, 292.0, 299.0, 285.0, 290.0, 292.0, 289.0, 293.0, 291.0, 291.0, 225.0, 230.0, 282.0, 300.0, 283.0, 293.0, 297.0, 285.0, 286.0, 296.0, 295.0, 292.0, 316.0, 311.0, 290.0, 289.0, 259.0, 260.0, 286.0, 293.0, 286.0, 301.0, 203.0, 202.0, 286.0, 293.0, 310.0, 320.0, 314.0, 316.0, 325.0, 308.0, 297.0, 270.0, 283.0, 293.0, 288.0, 299.0, 285.0, 291.0, 298.0, 286.0, 296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7038370220595624, "mean_inference_ms": 1.261504645612948, "mean_action_processing_ms": 0.1346470754198018, "mean_env_wait_ms": 0.8476888602932366, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7219200, "num_agent_steps_trained": 7219200, "num_env_steps_sampled": 3609600, "num_env_steps_trained": 3609600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3609600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7219200, "timers": {"training_iteration_time_ms": 3647.237, "learn_time_ms": 1120.464, "learn_throughput": 11423.84, "synch_weights_time_ms": 12.239}, "counters": {"num_env_steps_sampled": 3609600, "num_env_steps_trained": 3609600, "num_agent_steps_sampled": 7219200, "num_agent_steps_trained": 7219200}, "done": false, "episodes_total": 9024, "training_iteration": 282, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-47", "timestamp": 1666581527, "time_this_iter_s": 3.7760872840881348, "time_total_s": 1086.6793360710144, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1086.6793360710144, "timesteps_since_restore": 0, "iterations_since_restore": 282, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.900000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.76, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.9, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.43, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.43, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.43, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -8.680624887347221e-05, "policy_loss": -0.00043897959403693676, "vf_loss": 7.619510173797607, "vf_explained_var": 0.6051114797592163, "kl": 0.0019998771604150534, "entropy": 0.8195531368255615, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3622400, "num_env_steps_trained": 3622400, "num_agent_steps_sampled": 7244800, "num_agent_steps_trained": 7244800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 578.36, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.18}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.76, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.9, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.42, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.43, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.94, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.17, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.43, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.43, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7037063875887167, "mean_inference_ms": 1.2613887993604969, "mean_action_processing_ms": 0.13463334098781574, "mean_env_wait_ms": 0.8476441305310047, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 578.36, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.18}, "hist_stats": {"episode_reward": [587.0, 579.0, 573.0, 576.0, 587.0, 582.0, 584.0, 579.0, 536.0, 570.0, 579.0, 584.0, 582.0, 582.0, 576.0, 582.0, 630.0, 579.0, 582.0, 576.0, 582.0, 576.0, 570.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 630.0, 573.0, 573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 290.0, 289.0, 277.0, 296.0, 285.0, 291.0, 290.0, 297.0, 295.0, 287.0, 290.0, 294.0, 290.0, 289.0, 274.0, 262.0, 290.0, 280.0, 285.0, 294.0, 288.0, 296.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 284.0, 298.0, 321.0, 309.0, 286.0, 293.0, 289.0, 293.0, 285.0, 291.0, 290.0, 292.0, 278.0, 298.0, 280.0, 290.0, 291.0, 288.0, 292.0, 284.0, 309.0, 321.0, 289.0, 290.0, 286.0, 296.0, 314.0, 316.0, 294.0, 285.0, 319.0, 311.0, 286.0, 287.0, 284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7037063875887167, "mean_inference_ms": 1.2613887993604969, "mean_action_processing_ms": 0.13463334098781574, "mean_env_wait_ms": 0.8476441305310047, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7244800, "num_agent_steps_trained": 7244800, "num_env_steps_sampled": 3622400, "num_env_steps_trained": 3622400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3622400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7244800, "timers": {"training_iteration_time_ms": 3675.302, "learn_time_ms": 1140.271, "learn_throughput": 11225.403, "synch_weights_time_ms": 12.245}, "counters": {"num_env_steps_sampled": 3622400, "num_env_steps_trained": 3622400, "num_agent_steps_sampled": 7244800, "num_agent_steps_trained": 7244800}, "done": false, "episodes_total": 9056, "training_iteration": 283, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-51", "timestamp": 1666581531, "time_this_iter_s": 3.807605266571045, "time_total_s": 1090.4869413375854, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1090.4869413375854, "timesteps_since_restore": 0, "iterations_since_restore": 283, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.18, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.46, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.82, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001002021599560976, "policy_loss": -0.00044721108861267567, "vf_loss": 7.613537788391113, "vf_explained_var": 0.6043774485588074, "kl": 0.002004144247621298, "entropy": 0.8286857604980469, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3635200, "num_env_steps_trained": 3635200, "num_agent_steps_sampled": 7270400, "num_agent_steps_trained": 7270400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 577.26, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 288.63}, "custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.46, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.82, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.42, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.95, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.49, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.42, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.42, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7036136534117213, "mean_inference_ms": 1.2611472236830996, "mean_action_processing_ms": 0.13462100380644718, "mean_env_wait_ms": 0.8474923370648417, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 577.26, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 288.63}, "hist_stats": {"episode_reward": [573.0, 576.0, 573.0, 530.0, 579.0, 579.0, 582.0, 582.0, 584.0, 576.0, 582.0, 576.0, 576.0, 584.0, 579.0, 579.0, 582.0, 582.0, 576.0, 633.0, 579.0, 582.0, 579.0, 473.0, 579.0, 582.0, 573.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 286.0, 290.0, 295.0, 278.0, 273.0, 257.0, 289.0, 290.0, 284.0, 295.0, 294.0, 288.0, 300.0, 282.0, 298.0, 286.0, 288.0, 288.0, 288.0, 294.0, 286.0, 290.0, 292.0, 284.0, 294.0, 290.0, 288.0, 291.0, 291.0, 288.0, 292.0, 290.0, 289.0, 293.0, 292.0, 284.0, 317.0, 316.0, 289.0, 290.0, 290.0, 292.0, 287.0, 292.0, 227.0, 246.0, 295.0, 284.0, 297.0, 285.0, 283.0, 290.0, 289.0, 293.0, 288.0, 291.0, 291.0, 288.0, 287.0, 289.0, 290.0, 297.0, 286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7036136534117213, "mean_inference_ms": 1.2611472236830996, "mean_action_processing_ms": 0.13462100380644718, "mean_env_wait_ms": 0.8474923370648417, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7270400, "num_agent_steps_trained": 7270400, "num_env_steps_sampled": 3635200, "num_env_steps_trained": 3635200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3635200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7270400, "timers": {"training_iteration_time_ms": 3685.546, "learn_time_ms": 1141.17, "learn_throughput": 11216.56, "synch_weights_time_ms": 11.751}, "counters": {"num_env_steps_sampled": 3635200, "num_env_steps_trained": 3635200, "num_agent_steps_sampled": 7270400, "num_agent_steps_trained": 7270400}, "done": false, "episodes_total": 9088, "training_iteration": 284, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-55", "timestamp": 1666581535, "time_this_iter_s": 3.668269395828247, "time_total_s": 1094.1552107334137, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1094.1552107334137, "timesteps_since_restore": 0, "iterations_since_restore": 284, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.05, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 179.14, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.38, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0017654591938480735, "policy_loss": -0.0021084710024297237, "vf_loss": 7.559107780456543, "vf_explained_var": 0.6072764992713928, "kl": 0.0019133866298943758, "entropy": 0.8257938623428345, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3648000, "num_env_steps_trained": 3648000, "num_agent_steps_sampled": 7296000, "num_agent_steps_trained": 7296000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 580.34, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 290.17}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 179.14, "shaped_reward_min": 80, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.38, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.29, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.08, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.78, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.29, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.08, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.29, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.08, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7035381333603594, "mean_inference_ms": 1.2608786606592253, "mean_action_processing_ms": 0.13460708506122054, "mean_env_wait_ms": 0.8473337645098168, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 240.0, "episode_reward_mean": 580.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 119.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 290.17}, "hist_stats": {"episode_reward": [573.0, 573.0, 639.0, 579.0, 578.0, 579.0, 636.0, 582.0, 576.0, 576.0, 582.0, 627.0, 579.0, 579.0, 240.0, 576.0, 582.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 579.0, 579.0, 579.0, 581.0, 581.0, 576.0, 582.0, 525.0, 636.0, 576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 293.0, 280.0, 324.0, 315.0, 297.0, 282.0, 293.0, 285.0, 290.0, 289.0, 320.0, 316.0, 288.0, 294.0, 288.0, 288.0, 285.0, 291.0, 290.0, 292.0, 308.0, 319.0, 291.0, 288.0, 289.0, 290.0, 119.0, 121.0, 288.0, 288.0, 290.0, 292.0, 291.0, 285.0, 283.0, 299.0, 286.0, 287.0, 289.0, 293.0, 293.0, 289.0, 283.0, 296.0, 295.0, 284.0, 289.0, 290.0, 287.0, 292.0, 285.0, 296.0, 284.0, 297.0, 286.0, 290.0, 289.0, 293.0, 261.0, 264.0, 313.0, 323.0, 286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7035381333603594, "mean_inference_ms": 1.2608786606592253, "mean_action_processing_ms": 0.13460708506122054, "mean_env_wait_ms": 0.8473337645098168, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7296000, "num_agent_steps_trained": 7296000, "num_env_steps_sampled": 3648000, "num_env_steps_trained": 3648000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3648000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7296000, "timers": {"training_iteration_time_ms": 3689.238, "learn_time_ms": 1146.734, "learn_throughput": 11162.136, "synch_weights_time_ms": 11.943}, "counters": {"num_env_steps_sampled": 3648000, "num_env_steps_trained": 3648000, "num_agent_steps_sampled": 7296000, "num_agent_steps_trained": 7296000}, "done": false, "episodes_total": 9120, "training_iteration": 285, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-18-59", "timestamp": 1666581539, "time_this_iter_s": 3.6995468139648438, "time_total_s": 1097.8547575473785, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1097.8547575473785, "timesteps_since_restore": 0, "iterations_since_restore": 285, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.86, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.7, "shaped_reward_min": 148, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.07, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.13, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.79, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0029266555793583393, "policy_loss": -0.0032794035505503416, "vf_loss": 7.646122932434082, "vf_explained_var": 0.5895668268203735, "kl": 0.001966602634638548, "entropy": 0.8237244486808777, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3660800, "num_env_steps_trained": 3660800, "num_agent_steps_sampled": 7321600, "num_agent_steps_trained": 7321600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 468.0, "episode_reward_mean": 581.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 290.85}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 179.7, "shaped_reward_min": 148, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.07, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.13, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.79, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.79, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.33, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.2, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.85, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.79, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.79, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7034854686787455, "mean_inference_ms": 1.2606134572888865, "mean_action_processing_ms": 0.13459397004636353, "mean_env_wait_ms": 0.8471829775070431, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 468.0, "episode_reward_mean": 581.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 290.85}, "hist_stats": {"episode_reward": [576.0, 579.0, 630.0, 579.0, 573.0, 525.0, 579.0, 587.0, 579.0, 570.0, 630.0, 579.0, 579.0, 582.0, 582.0, 519.0, 576.0, 582.0, 582.0, 576.0, 579.0, 579.0, 630.0, 579.0, 582.0, 579.0, 636.0, 582.0, 582.0, 573.0, 582.0, 576.0, 582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 292.0, 287.0, 314.0, 316.0, 291.0, 288.0, 287.0, 286.0, 263.0, 262.0, 296.0, 283.0, 298.0, 289.0, 293.0, 286.0, 285.0, 285.0, 305.0, 325.0, 291.0, 288.0, 288.0, 291.0, 289.0, 293.0, 295.0, 287.0, 258.0, 261.0, 295.0, 281.0, 291.0, 291.0, 296.0, 286.0, 279.0, 297.0, 287.0, 292.0, 290.0, 289.0, 310.0, 320.0, 292.0, 287.0, 287.0, 295.0, 287.0, 292.0, 312.0, 324.0, 290.0, 292.0, 291.0, 291.0, 290.0, 283.0, 289.0, 293.0, 287.0, 289.0, 293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7034854686787455, "mean_inference_ms": 1.2606134572888865, "mean_action_processing_ms": 0.13459397004636353, "mean_env_wait_ms": 0.8471829775070431, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7321600, "num_agent_steps_trained": 7321600, "num_env_steps_sampled": 3660800, "num_env_steps_trained": 3660800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3660800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7321600, "timers": {"training_iteration_time_ms": 3689.191, "learn_time_ms": 1150.78, "learn_throughput": 11122.895, "synch_weights_time_ms": 12.204}, "counters": {"num_env_steps_sampled": 3660800, "num_env_steps_trained": 3660800, "num_agent_steps_sampled": 7321600, "num_agent_steps_trained": 7321600}, "done": false, "episodes_total": 9152, "training_iteration": 286, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-03", "timestamp": 1666581543, "time_this_iter_s": 3.6968560218811035, "time_total_s": 1101.5516135692596, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1101.5516135692596, "timesteps_since_restore": 0, "iterations_since_restore": 286, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.516666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.74, "shaped_reward_min": 111, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.52, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010225408477708697, "policy_loss": -0.0013909590197727084, "vf_loss": 7.777632236480713, "vf_explained_var": 0.571610689163208, "kl": 0.002125002443790436, "entropy": 0.8186874389648438, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3673600, "num_env_steps_trained": 3673600, "num_agent_steps_sampled": 7347200, "num_agent_steps_trained": 7347200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 351.0, "episode_reward_mean": 578.34, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 289.17}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.74, "shaped_reward_min": 111, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.75, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.52, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.77, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7034303932588013, "mean_inference_ms": 1.2603550541368185, "mean_action_processing_ms": 0.13458294799057233, "mean_env_wait_ms": 0.8470510383832814, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 351.0, "episode_reward_mean": 578.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 289.17}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 579.0, 636.0, 582.0, 630.0, 582.0, 522.0, 579.0, 579.0, 627.0, 582.0, 633.0, 573.0, 582.0, 582.0, 636.0, 579.0, 581.0, 582.0, 570.0, 582.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 627.0, 576.0, 582.0, 521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 288.0, 286.0, 296.0, 293.0, 286.0, 314.0, 322.0, 283.0, 299.0, 314.0, 316.0, 283.0, 299.0, 260.0, 262.0, 289.0, 290.0, 289.0, 290.0, 307.0, 320.0, 292.0, 290.0, 316.0, 317.0, 280.0, 293.0, 291.0, 291.0, 295.0, 287.0, 321.0, 315.0, 285.0, 294.0, 289.0, 292.0, 290.0, 292.0, 283.0, 287.0, 291.0, 291.0, 294.0, 285.0, 294.0, 288.0, 298.0, 289.0, 291.0, 288.0, 290.0, 289.0, 292.0, 290.0, 314.0, 313.0, 284.0, 292.0, 294.0, 288.0, 260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7034303932588013, "mean_inference_ms": 1.2603550541368185, "mean_action_processing_ms": 0.13458294799057233, "mean_env_wait_ms": 0.8470510383832814, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7347200, "num_agent_steps_trained": 7347200, "num_env_steps_sampled": 3673600, "num_env_steps_trained": 3673600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3673600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7347200, "timers": {"training_iteration_time_ms": 3701.219, "learn_time_ms": 1154.78, "learn_throughput": 11084.364, "synch_weights_time_ms": 11.718}, "counters": {"num_env_steps_sampled": 3673600, "num_env_steps_trained": 3673600, "num_agent_steps_sampled": 7347200, "num_agent_steps_trained": 7347200}, "done": false, "episodes_total": 9184, "training_iteration": 287, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-06", "timestamp": 1666581546, "time_this_iter_s": 3.8007616996765137, "time_total_s": 1105.3523752689362, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1105.3523752689362, "timesteps_since_restore": 0, "iterations_since_restore": 287, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.800000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.06, "shaped_reward_min": 111, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.37, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.12, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.05, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.05, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.05, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019629087764769793, "policy_loss": -0.0023256689310073853, "vf_loss": 7.742514133453369, "vf_explained_var": 0.5878534913063049, "kl": 0.002066924935206771, "entropy": 0.8229769468307495, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3686400, "num_env_steps_trained": 3686400, "num_agent_steps_sampled": 7372800, "num_agent_steps_trained": 7372800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 351.0, "episode_reward_mean": 574.86, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.43}, "custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.06, "shaped_reward_min": 111, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.37, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.12, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.08, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.05, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.87, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.08, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.05, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.08, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.05, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7033596138763246, "mean_inference_ms": 1.2601166688201468, "mean_action_processing_ms": 0.13457362199660333, "mean_env_wait_ms": 0.8469378616858445, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 351.0, "episode_reward_mean": 574.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 287.43}, "hist_stats": {"episode_reward": [521.0, 582.0, 576.0, 570.0, 630.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 576.0, 582.0, 582.0, 576.0, 570.0, 576.0, 587.0, 579.0, 579.0, 582.0, 582.0, 587.0, 579.0, 579.0, 525.0, 576.0, 627.0, 582.0, 582.0, 579.0, 576.0, 582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 261.0, 293.0, 289.0, 286.0, 290.0, 289.0, 281.0, 315.0, 315.0, 288.0, 291.0, 291.0, 291.0, 286.0, 296.0, 295.0, 287.0, 293.0, 289.0, 296.0, 286.0, 292.0, 284.0, 294.0, 288.0, 284.0, 298.0, 294.0, 282.0, 291.0, 279.0, 285.0, 291.0, 288.0, 299.0, 288.0, 291.0, 285.0, 294.0, 289.0, 293.0, 293.0, 289.0, 297.0, 290.0, 287.0, 292.0, 289.0, 290.0, 266.0, 259.0, 281.0, 295.0, 313.0, 314.0, 287.0, 295.0, 289.0, 293.0, 296.0, 283.0, 281.0, 295.0, 289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7033596138763246, "mean_inference_ms": 1.2601166688201468, "mean_action_processing_ms": 0.13457362199660333, "mean_env_wait_ms": 0.8469378616858445, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7372800, "num_agent_steps_trained": 7372800, "num_env_steps_sampled": 3686400, "num_env_steps_trained": 3686400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3686400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7372800, "timers": {"training_iteration_time_ms": 3706.021, "learn_time_ms": 1160.239, "learn_throughput": 11032.208, "synch_weights_time_ms": 11.723}, "counters": {"num_env_steps_sampled": 3686400, "num_env_steps_trained": 3686400, "num_agent_steps_sampled": 7372800, "num_agent_steps_trained": 7372800}, "done": false, "episodes_total": 9216, "training_iteration": 288, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-10", "timestamp": 1666581550, "time_this_iter_s": 3.7877204418182373, "time_total_s": 1109.1400957107544, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1109.1400957107544, "timesteps_since_restore": 0, "iterations_since_restore": 288, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.750000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.21, "shaped_reward_min": 97, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.85, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014422027161344886, "policy_loss": 0.0010773971443995833, "vf_loss": 7.724160194396973, "vf_explained_var": 0.5966507196426392, "kl": 0.0017015428747981787, "entropy": 0.8152189254760742, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3699200, "num_env_steps_trained": 3699200, "num_agent_steps_sampled": 7398400, "num_agent_steps_trained": 7398400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 297.0, "episode_reward_mean": 572.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.205}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.21, "shaped_reward_min": 97, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.85, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.52, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.38, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.36, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.52, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.52, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7032903656120397, "mean_inference_ms": 1.2598923935864008, "mean_action_processing_ms": 0.13456767469158165, "mean_env_wait_ms": 0.8468338102519962, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 297.0, "episode_reward_mean": 572.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.205}, "hist_stats": {"episode_reward": [582.0, 630.0, 468.0, 522.0, 522.0, 582.0, 576.0, 582.0, 579.0, 576.0, 351.0, 576.0, 576.0, 633.0, 579.0, 582.0, 582.0, 530.0, 582.0, 573.0, 525.0, 633.0, 579.0, 582.0, 582.0, 633.0, 570.0, 576.0, 579.0, 579.0, 582.0, 582.0, 525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 317.0, 313.0, 239.0, 229.0, 259.0, 263.0, 259.0, 263.0, 291.0, 291.0, 287.0, 289.0, 286.0, 296.0, 283.0, 296.0, 286.0, 290.0, 179.0, 172.0, 294.0, 282.0, 294.0, 282.0, 321.0, 312.0, 287.0, 292.0, 292.0, 290.0, 290.0, 292.0, 268.0, 262.0, 291.0, 291.0, 289.0, 284.0, 256.0, 269.0, 326.0, 307.0, 286.0, 293.0, 287.0, 295.0, 295.0, 287.0, 317.0, 316.0, 279.0, 291.0, 291.0, 285.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 284.0, 298.0, 267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7032903656120397, "mean_inference_ms": 1.2598923935864008, "mean_action_processing_ms": 0.13456767469158165, "mean_env_wait_ms": 0.8468338102519962, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7398400, "num_agent_steps_trained": 7398400, "num_env_steps_sampled": 3699200, "num_env_steps_trained": 3699200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3699200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7398400, "timers": {"training_iteration_time_ms": 3696.327, "learn_time_ms": 1152.384, "learn_throughput": 11107.41, "synch_weights_time_ms": 12.455}, "counters": {"num_env_steps_sampled": 3699200, "num_env_steps_trained": 3699200, "num_agent_steps_sampled": 7398400, "num_agent_steps_trained": 7398400}, "done": false, "episodes_total": 9248, "training_iteration": 289, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-14", "timestamp": 1666581554, "time_this_iter_s": 3.6477789878845215, "time_total_s": 1112.787874698639, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1112.787874698639, "timesteps_since_restore": 0, "iterations_since_restore": 289, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.74, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.02, "shaped_reward_min": 97, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.62, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.48, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.27, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.27, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.27, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031906738877296448, "policy_loss": 0.002833613660186529, "vf_loss": 7.63752555847168, "vf_explained_var": 0.5924453735351562, "kl": 0.002333316020667553, "entropy": 0.8133819103240967, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3712000, "num_env_steps_trained": 3712000, "num_agent_steps_sampled": 7424000, "num_agent_steps_trained": 7424000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 297.0, "episode_reward_mean": 576.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 288.01}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.02, "shaped_reward_min": 97, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.62, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.48, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.89, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.27, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.85, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.78, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.27, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.85, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.27, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.85, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7032018944909996, "mean_inference_ms": 1.2596606000828354, "mean_action_processing_ms": 0.13456065293030725, "mean_env_wait_ms": 0.8467261832828717, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 297.0, "episode_reward_mean": 576.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 288.01}, "hist_stats": {"episode_reward": [525.0, 576.0, 627.0, 576.0, 584.0, 582.0, 582.0, 576.0, 576.0, 582.0, 581.0, 582.0, 579.0, 582.0, 530.0, 579.0, 582.0, 579.0, 579.0, 581.0, 573.0, 570.0, 573.0, 579.0, 536.0, 539.0, 582.0, 636.0, 576.0, 582.0, 630.0, 530.0, 582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 293.0, 283.0, 309.0, 318.0, 295.0, 281.0, 303.0, 281.0, 294.0, 288.0, 288.0, 294.0, 287.0, 289.0, 284.0, 292.0, 293.0, 289.0, 297.0, 284.0, 294.0, 288.0, 289.0, 290.0, 286.0, 296.0, 260.0, 270.0, 285.0, 294.0, 295.0, 287.0, 291.0, 288.0, 297.0, 282.0, 290.0, 291.0, 284.0, 289.0, 287.0, 283.0, 284.0, 289.0, 292.0, 287.0, 263.0, 273.0, 271.0, 268.0, 288.0, 294.0, 322.0, 314.0, 286.0, 290.0, 299.0, 283.0, 313.0, 317.0, 266.0, 264.0, 297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7032018944909996, "mean_inference_ms": 1.2596606000828354, "mean_action_processing_ms": 0.13456065293030725, "mean_env_wait_ms": 0.8467261832828717, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7424000, "num_agent_steps_trained": 7424000, "num_env_steps_sampled": 3712000, "num_env_steps_trained": 3712000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3712000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7424000, "timers": {"training_iteration_time_ms": 3681.817, "learn_time_ms": 1145.831, "learn_throughput": 11170.935, "synch_weights_time_ms": 11.814}, "counters": {"num_env_steps_sampled": 3712000, "num_env_steps_trained": 3712000, "num_agent_steps_sampled": 7424000, "num_agent_steps_trained": 7424000}, "done": false, "episodes_total": 9280, "training_iteration": 290, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-18", "timestamp": 1666581558, "time_this_iter_s": 3.60448956489563, "time_total_s": 1116.3923642635345, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1116.3923642635345, "timesteps_since_restore": 0, "iterations_since_restore": 290, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.9, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.9, "shaped_reward_min": 97, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.31, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.91, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.83, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010251335334032774, "policy_loss": -0.0013847454683855176, "vf_loss": 7.6011223793029785, "vf_explained_var": 0.5847921371459961, "kl": 0.0018953521503135562, "entropy": 0.8009949922561646, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3724800, "num_env_steps_trained": 3724800, "num_agent_steps_sampled": 7449600, "num_agent_steps_trained": 7449600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 297.0, "episode_reward_mean": 580.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 290.45}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.9, "shaped_reward_min": 97, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.5, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.36, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.31, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.13, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.18, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.09, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.91, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.57, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.83, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.3, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.28, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.18, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.09, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.18, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.09, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7031017145418876, "mean_inference_ms": 1.2594336419828618, "mean_action_processing_ms": 0.13455223025516794, "mean_env_wait_ms": 0.8466123493119135, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 297.0, "episode_reward_mean": 580.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 146.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 290.45}, "hist_stats": {"episode_reward": [582.0, 587.0, 582.0, 573.0, 527.0, 579.0, 630.0, 582.0, 584.0, 573.0, 627.0, 579.0, 582.0, 582.0, 570.0, 573.0, 630.0, 576.0, 573.0, 579.0, 579.0, 524.0, 587.0, 582.0, 584.0, 576.0, 576.0, 582.0, 579.0, 297.0, 579.0, 582.0, 582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 292.0, 295.0, 287.0, 295.0, 293.0, 280.0, 264.0, 263.0, 292.0, 287.0, 321.0, 309.0, 295.0, 287.0, 289.0, 295.0, 294.0, 279.0, 309.0, 318.0, 285.0, 294.0, 294.0, 288.0, 289.0, 293.0, 283.0, 287.0, 288.0, 285.0, 311.0, 319.0, 285.0, 291.0, 284.0, 289.0, 288.0, 291.0, 285.0, 294.0, 256.0, 268.0, 294.0, 293.0, 290.0, 292.0, 297.0, 287.0, 286.0, 290.0, 288.0, 288.0, 281.0, 301.0, 290.0, 289.0, 151.0, 146.0, 287.0, 292.0, 299.0, 283.0, 296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7031017145418876, "mean_inference_ms": 1.2594336419828618, "mean_action_processing_ms": 0.13455223025516794, "mean_env_wait_ms": 0.8466123493119135, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7449600, "num_agent_steps_trained": 7449600, "num_env_steps_sampled": 3724800, "num_env_steps_trained": 3724800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3724800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7449600, "timers": {"training_iteration_time_ms": 3651.182, "learn_time_ms": 1146.906, "learn_throughput": 11160.463, "synch_weights_time_ms": 11.194}, "counters": {"num_env_steps_sampled": 3724800, "num_env_steps_trained": 3724800, "num_agent_steps_sampled": 7449600, "num_agent_steps_trained": 7449600}, "done": false, "episodes_total": 9312, "training_iteration": 291, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-22", "timestamp": 1666581562, "time_this_iter_s": 3.6406378746032715, "time_total_s": 1120.0330021381378, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1120.0330021381378, "timesteps_since_restore": 0, "iterations_since_restore": 291, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.5, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.39, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.86, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.39, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.39, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013125705299898982, "policy_loss": 0.0009523761691525578, "vf_loss": 7.632638454437256, "vf_explained_var": 0.6011805534362793, "kl": 0.002176450565457344, "entropy": 0.8061335682868958, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3737600, "num_env_steps_trained": 3737600, "num_agent_steps_sampled": 7475200, "num_agent_steps_trained": 7475200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 583.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.95}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.5, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.39, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.17, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.86, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.4, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.4, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.39, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.39, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029979193287846, "mean_inference_ms": 1.2592075008193908, "mean_action_processing_ms": 0.1345427518722338, "mean_env_wait_ms": 0.846499986824968, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 583.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.95}, "hist_stats": {"episode_reward": [582.0, 582.0, 576.0, 573.0, 582.0, 576.0, 579.0, 627.0, 576.0, 576.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 579.0, 579.0, 576.0, 579.0, 525.0, 582.0, 576.0, 579.0, 579.0, 576.0, 573.0, 630.0, 576.0, 579.0, 525.0, 582.0, 576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 294.0, 288.0, 287.0, 289.0, 291.0, 282.0, 285.0, 297.0, 290.0, 286.0, 297.0, 282.0, 313.0, 314.0, 291.0, 285.0, 285.0, 291.0, 282.0, 300.0, 292.0, 290.0, 284.0, 298.0, 272.0, 253.0, 295.0, 284.0, 290.0, 280.0, 292.0, 287.0, 285.0, 294.0, 291.0, 285.0, 294.0, 285.0, 259.0, 266.0, 291.0, 291.0, 287.0, 289.0, 290.0, 289.0, 290.0, 289.0, 285.0, 291.0, 283.0, 290.0, 321.0, 309.0, 276.0, 300.0, 284.0, 295.0, 261.0, 264.0, 292.0, 290.0, 288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029979193287846, "mean_inference_ms": 1.2592075008193908, "mean_action_processing_ms": 0.1345427518722338, "mean_env_wait_ms": 0.846499986824968, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7475200, "num_agent_steps_trained": 7475200, "num_env_steps_sampled": 3737600, "num_env_steps_trained": 3737600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3737600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7475200, "timers": {"training_iteration_time_ms": 3645.821, "learn_time_ms": 1146.461, "learn_throughput": 11164.789, "synch_weights_time_ms": 11.016}, "counters": {"num_env_steps_sampled": 3737600, "num_env_steps_trained": 3737600, "num_agent_steps_sampled": 7475200, "num_agent_steps_trained": 7475200}, "done": false, "episodes_total": 9344, "training_iteration": 292, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-26", "timestamp": 1666581566, "time_this_iter_s": 3.722670793533325, "time_total_s": 1123.7556729316711, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1123.7556729316711, "timesteps_since_restore": 0, "iterations_since_restore": 292, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.32, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.72, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003355878870934248, "policy_loss": -9.82009805738926e-06, "vf_loss": 7.542318344116211, "vf_explained_var": 0.6062654256820679, "kl": 0.002053479664027691, "entropy": 0.8176416158676147, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3750400, "num_env_steps_trained": 3750400, "num_agent_steps_sampled": 7500800, "num_agent_steps_trained": 7500800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 586.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 293.16}, "custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.32, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.92, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.72, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.56, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.71, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.62, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.59, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.56, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.56, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029100265072316, "mean_inference_ms": 1.2589701047132034, "mean_action_processing_ms": 0.1345321957448095, "mean_env_wait_ms": 0.8463812083404905, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 586.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 293.16}, "hist_stats": {"episode_reward": [576.0, 576.0, 630.0, 633.0, 587.0, 582.0, 576.0, 579.0, 582.0, 633.0, 576.0, 624.0, 582.0, 576.0, 627.0, 639.0, 627.0, 627.0, 573.0, 573.0, 579.0, 579.0, 582.0, 633.0, 581.0, 579.0, 579.0, 582.0, 576.0, 579.0, 573.0, 576.0, 582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 289.0, 287.0, 312.0, 318.0, 316.0, 317.0, 297.0, 290.0, 293.0, 289.0, 290.0, 286.0, 289.0, 290.0, 287.0, 295.0, 313.0, 320.0, 291.0, 285.0, 318.0, 306.0, 289.0, 293.0, 288.0, 288.0, 318.0, 309.0, 317.0, 322.0, 309.0, 318.0, 314.0, 313.0, 280.0, 293.0, 289.0, 284.0, 288.0, 291.0, 290.0, 289.0, 292.0, 290.0, 317.0, 316.0, 295.0, 286.0, 287.0, 292.0, 288.0, 291.0, 294.0, 288.0, 287.0, 289.0, 289.0, 290.0, 291.0, 282.0, 288.0, 288.0, 293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029100265072316, "mean_inference_ms": 1.2589701047132034, "mean_action_processing_ms": 0.1345321957448095, "mean_env_wait_ms": 0.8463812083404905, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7500800, "num_agent_steps_trained": 7500800, "num_env_steps_sampled": 3750400, "num_env_steps_trained": 3750400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3750400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7500800, "timers": {"training_iteration_time_ms": 3628.866, "learn_time_ms": 1127.478, "learn_throughput": 11352.771, "synch_weights_time_ms": 11.607}, "counters": {"num_env_steps_sampled": 3750400, "num_env_steps_trained": 3750400, "num_agent_steps_sampled": 7500800, "num_agent_steps_trained": 7500800}, "done": false, "episodes_total": 9376, "training_iteration": 293, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-30", "timestamp": 1666581570, "time_this_iter_s": 3.6530778408050537, "time_total_s": 1127.4087507724762, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1127.4087507724762, "timesteps_since_restore": 0, "iterations_since_restore": 293, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.77, "shaped_reward_min": 156, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0026967034209519625, "policy_loss": -0.0030466155149042606, "vf_loss": 7.593386650085449, "vf_explained_var": 0.6006097793579102, "kl": 0.0022225300781428814, "entropy": 0.81884765625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3763200, "num_env_steps_trained": 3763200, "num_agent_steps_sampled": 7526400, "num_agent_steps_trained": 7526400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 516.0, "episode_reward_mean": 582.97, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.485}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.77, "shaped_reward_min": 156, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.24, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.04, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.78, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.67, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.03, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.56, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.78, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.67, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.78, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.67, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028321055068703, "mean_inference_ms": 1.258741796548919, "mean_action_processing_ms": 0.13452419454572812, "mean_env_wait_ms": 0.8462833252091105, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 516.0, "episode_reward_mean": 582.97, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 254.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.485}, "hist_stats": {"episode_reward": [582.0, 579.0, 630.0, 582.0, 576.0, 576.0, 530.0, 582.0, 579.0, 582.0, 579.0, 582.0, 633.0, 582.0, 579.0, 516.0, 587.0, 627.0, 627.0, 582.0, 579.0, 570.0, 624.0, 522.0, 581.0, 519.0, 630.0, 630.0, 579.0, 579.0, 582.0, 582.0, 579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 285.0, 294.0, 308.0, 322.0, 294.0, 288.0, 284.0, 292.0, 289.0, 287.0, 267.0, 263.0, 296.0, 286.0, 289.0, 290.0, 294.0, 288.0, 291.0, 288.0, 293.0, 289.0, 318.0, 315.0, 290.0, 292.0, 285.0, 294.0, 257.0, 259.0, 295.0, 292.0, 311.0, 316.0, 321.0, 306.0, 294.0, 288.0, 288.0, 291.0, 277.0, 293.0, 316.0, 308.0, 262.0, 260.0, 290.0, 291.0, 254.0, 265.0, 315.0, 315.0, 308.0, 322.0, 289.0, 290.0, 289.0, 290.0, 289.0, 293.0, 290.0, 292.0, 288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028321055068703, "mean_inference_ms": 1.258741796548919, "mean_action_processing_ms": 0.13452419454572812, "mean_env_wait_ms": 0.8462833252091105, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7526400, "num_agent_steps_trained": 7526400, "num_env_steps_sampled": 3763200, "num_env_steps_trained": 3763200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3763200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7526400, "timers": {"training_iteration_time_ms": 3627.155, "learn_time_ms": 1132.182, "learn_throughput": 11305.603, "synch_weights_time_ms": 11.691}, "counters": {"num_env_steps_sampled": 3763200, "num_env_steps_trained": 3763200, "num_agent_steps_sampled": 7526400, "num_agent_steps_trained": 7526400}, "done": false, "episodes_total": 9408, "training_iteration": 294, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-33", "timestamp": 1666581573, "time_this_iter_s": 3.654649496078491, "time_total_s": 1131.0634002685547, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1131.0634002685547, "timesteps_since_restore": 0, "iterations_since_restore": 294, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.25, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.01, "shaped_reward_min": 162, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.79, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013797101564705372, "policy_loss": -0.0017297662561759353, "vf_loss": 7.56453800201416, "vf_explained_var": 0.5991897583007812, "kl": 0.002123283687978983, "entropy": 0.8127896785736084, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3776000, "num_env_steps_trained": 3776000, "num_agent_steps_sampled": 7552000, "num_agent_steps_trained": 7552000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 522.0, "episode_reward_mean": 583.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.605}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.01, "shaped_reward_min": 162, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.79, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.3, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.6, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.06, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.4, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.69, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.16, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.55, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.51, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.06, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.4, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.06, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.4, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.702779161330629, "mean_inference_ms": 1.2585260649777843, "mean_action_processing_ms": 0.13451059035678656, "mean_env_wait_ms": 0.846147640378893, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 522.0, "episode_reward_mean": 583.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 291.605}, "hist_stats": {"episode_reward": [579.0, 573.0, 576.0, 573.0, 570.0, 573.0, 582.0, 573.0, 579.0, 633.0, 582.0, 579.0, 624.0, 573.0, 579.0, 582.0, 587.0, 522.0, 576.0, 630.0, 570.0, 587.0, 579.0, 582.0, 576.0, 582.0, 576.0, 582.0, 579.0, 582.0, 584.0, 633.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 283.0, 290.0, 291.0, 285.0, 274.0, 299.0, 288.0, 282.0, 288.0, 285.0, 288.0, 294.0, 288.0, 285.0, 290.0, 289.0, 317.0, 316.0, 290.0, 292.0, 291.0, 288.0, 310.0, 314.0, 289.0, 284.0, 293.0, 286.0, 295.0, 287.0, 288.0, 299.0, 259.0, 263.0, 286.0, 290.0, 310.0, 320.0, 288.0, 282.0, 295.0, 292.0, 295.0, 284.0, 292.0, 290.0, 288.0, 288.0, 291.0, 291.0, 285.0, 291.0, 292.0, 290.0, 290.0, 289.0, 291.0, 291.0, 290.0, 294.0, 314.0, 319.0, 289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.702779161330629, "mean_inference_ms": 1.2585260649777843, "mean_action_processing_ms": 0.13451059035678656, "mean_env_wait_ms": 0.846147640378893, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7552000, "num_agent_steps_trained": 7552000, "num_env_steps_sampled": 3776000, "num_env_steps_trained": 3776000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3776000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7552000, "timers": {"training_iteration_time_ms": 3633.479, "learn_time_ms": 1129.111, "learn_throughput": 11336.351, "synch_weights_time_ms": 12.533}, "counters": {"num_env_steps_sampled": 3776000, "num_env_steps_trained": 3776000, "num_agent_steps_sampled": 7552000, "num_agent_steps_trained": 7552000}, "done": false, "episodes_total": 9440, "training_iteration": 295, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-37", "timestamp": 1666581577, "time_this_iter_s": 3.774996042251587, "time_total_s": 1134.8383963108063, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1134.8383963108063, "timesteps_since_restore": 0, "iterations_since_restore": 295, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.24, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.0, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.81, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031795313116163015, "policy_loss": 0.0028263104613870382, "vf_loss": 7.591133117675781, "vf_explained_var": 0.6210389137268066, "kl": 0.002431360073387623, "entropy": 0.8117805123329163, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3788800, "num_env_steps_trained": 3788800, "num_agent_steps_sampled": 7577600, "num_agent_steps_trained": 7577600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 581.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 290.6}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.0, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.12, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 6.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.81, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.64, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028229567314425, "mean_inference_ms": 1.2584377676811593, "mean_action_processing_ms": 0.13449847542232196, "mean_env_wait_ms": 0.8460929726919433, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 581.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 290.6}, "hist_stats": {"episode_reward": [582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 579.0, 573.0, 584.0, 573.0, 576.0, 633.0, 573.0, 579.0, 590.0, 573.0, 582.0, 582.0, 576.0, 582.0, 579.0, 573.0, 582.0, 522.0, 573.0, 630.0, 584.0, 582.0, 579.0, 582.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 292.0, 295.0, 293.0, 286.0, 294.0, 288.0, 284.0, 298.0, 291.0, 291.0, 291.0, 288.0, 291.0, 282.0, 284.0, 300.0, 284.0, 289.0, 285.0, 291.0, 318.0, 315.0, 282.0, 291.0, 294.0, 285.0, 294.0, 296.0, 291.0, 282.0, 291.0, 291.0, 291.0, 291.0, 283.0, 293.0, 296.0, 286.0, 291.0, 288.0, 280.0, 293.0, 287.0, 295.0, 261.0, 261.0, 284.0, 289.0, 311.0, 319.0, 295.0, 289.0, 293.0, 289.0, 294.0, 285.0, 291.0, 291.0, 292.0, 287.0, 292.0, 295.0, 290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028229567314425, "mean_inference_ms": 1.2584377676811593, "mean_action_processing_ms": 0.13449847542232196, "mean_env_wait_ms": 0.8460929726919433, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7577600, "num_agent_steps_trained": 7577600, "num_env_steps_sampled": 3788800, "num_env_steps_trained": 3788800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3788800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7577600, "timers": {"training_iteration_time_ms": 3687.668, "learn_time_ms": 1131.973, "learn_throughput": 11307.692, "synch_weights_time_ms": 12.297}, "counters": {"num_env_steps_sampled": 3788800, "num_env_steps_trained": 3788800, "num_agent_steps_sampled": 7577600, "num_agent_steps_trained": 7577600}, "done": false, "episodes_total": 9472, "training_iteration": 296, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-42", "timestamp": 1666581582, "time_this_iter_s": 4.240463972091675, "time_total_s": 1139.078860282898, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1139.078860282898, "timesteps_since_restore": 0, "iterations_since_restore": 296, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.483333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.9, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.81, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009865972679108381, "policy_loss": -0.001332993502728641, "vf_loss": 7.5051116943359375, "vf_explained_var": 0.6193879842758179, "kl": 0.0017398454947397113, "entropy": 0.8082244396209717, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3801600, "num_env_steps_trained": 3801600, "num_agent_steps_sampled": 7603200, "num_agent_steps_trained": 7603200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 586.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 293.05}, "custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.9, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.86, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.64, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.81, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.86, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.64, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.86, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.64, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028788544209611, "mean_inference_ms": 1.258342790567867, "mean_action_processing_ms": 0.134484969591017, "mean_env_wait_ms": 0.8460233377454522, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 586.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 293.05}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 630.0, 582.0, 576.0, 570.0, 573.0, 587.0, 587.0, 581.0, 582.0, 582.0, 522.0, 630.0, 582.0, 576.0, 579.0, 576.0, 579.0, 579.0, 587.0, 579.0, 582.0, 579.0, 630.0, 633.0, 530.0, 579.0, 627.0, 573.0, 587.0, 576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 289.0, 291.0, 291.0, 291.0, 288.0, 309.0, 321.0, 296.0, 286.0, 285.0, 291.0, 283.0, 287.0, 288.0, 285.0, 302.0, 285.0, 295.0, 292.0, 293.0, 288.0, 291.0, 291.0, 292.0, 290.0, 265.0, 257.0, 315.0, 315.0, 287.0, 295.0, 291.0, 285.0, 290.0, 289.0, 295.0, 281.0, 289.0, 290.0, 279.0, 300.0, 292.0, 295.0, 290.0, 289.0, 289.0, 293.0, 295.0, 284.0, 315.0, 315.0, 313.0, 320.0, 268.0, 262.0, 288.0, 291.0, 322.0, 305.0, 281.0, 292.0, 290.0, 297.0, 284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028788544209611, "mean_inference_ms": 1.258342790567867, "mean_action_processing_ms": 0.134484969591017, "mean_env_wait_ms": 0.8460233377454522, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7603200, "num_agent_steps_trained": 7603200, "num_env_steps_sampled": 3801600, "num_env_steps_trained": 3801600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3801600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7603200, "timers": {"training_iteration_time_ms": 3683.02, "learn_time_ms": 1123.786, "learn_throughput": 11390.065, "synch_weights_time_ms": 12.273}, "counters": {"num_env_steps_sampled": 3801600, "num_env_steps_trained": 3801600, "num_agent_steps_sampled": 7603200, "num_agent_steps_trained": 7603200}, "done": false, "episodes_total": 9504, "training_iteration": 297, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-46", "timestamp": 1666581586, "time_this_iter_s": 3.7531981468200684, "time_total_s": 1142.832058429718, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1142.832058429718, "timesteps_since_restore": 0, "iterations_since_restore": 297, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.78333333333333, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 181.07, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00189976638648659, "policy_loss": 0.001558080199174583, "vf_loss": 7.5299530029296875, "vf_explained_var": 0.6205426454544067, "kl": 0.0019256204832345247, "entropy": 0.8226147890090942, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3814400, "num_env_steps_trained": 3814400, "num_agent_steps_sampled": 7628800, "num_agent_steps_trained": 7628800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 586.67, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 293.335}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 181.07, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.15, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.28, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.85, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.63, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029101461745526, "mean_inference_ms": 1.2582519609207465, "mean_action_processing_ms": 0.13447394317000266, "mean_env_wait_ms": 0.8459687051230822, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 586.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 293.335}, "hist_stats": {"episode_reward": [576.0, 630.0, 576.0, 582.0, 576.0, 579.0, 590.0, 630.0, 354.0, 584.0, 576.0, 587.0, 582.0, 582.0, 582.0, 584.0, 579.0, 627.0, 584.0, 576.0, 630.0, 633.0, 522.0, 582.0, 627.0, 582.0, 530.0, 579.0, 636.0, 519.0, 536.0, 582.0, 584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 292.0, 319.0, 311.0, 293.0, 283.0, 298.0, 284.0, 286.0, 290.0, 290.0, 289.0, 294.0, 296.0, 313.0, 317.0, 179.0, 175.0, 282.0, 302.0, 287.0, 289.0, 289.0, 298.0, 282.0, 300.0, 292.0, 290.0, 295.0, 287.0, 292.0, 292.0, 283.0, 296.0, 313.0, 314.0, 296.0, 288.0, 288.0, 288.0, 315.0, 315.0, 312.0, 321.0, 263.0, 259.0, 289.0, 293.0, 316.0, 311.0, 290.0, 292.0, 269.0, 261.0, 290.0, 289.0, 323.0, 313.0, 256.0, 263.0, 271.0, 265.0, 286.0, 296.0, 297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7029101461745526, "mean_inference_ms": 1.2582519609207465, "mean_action_processing_ms": 0.13447394317000266, "mean_env_wait_ms": 0.8459687051230822, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7628800, "num_agent_steps_trained": 7628800, "num_env_steps_sampled": 3814400, "num_env_steps_trained": 3814400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3814400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7628800, "timers": {"training_iteration_time_ms": 3661.363, "learn_time_ms": 1106.611, "learn_throughput": 11566.848, "synch_weights_time_ms": 11.618}, "counters": {"num_env_steps_sampled": 3814400, "num_env_steps_trained": 3814400, "num_agent_steps_sampled": 7628800, "num_agent_steps_trained": 7628800}, "done": false, "episodes_total": 9536, "training_iteration": 298, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-49", "timestamp": 1666581589, "time_this_iter_s": 3.576004981994629, "time_total_s": 1146.4080634117126, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1146.4080634117126, "timesteps_since_restore": 0, "iterations_since_restore": 298, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.22, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.29, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.26, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.95, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.79, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001324523356743157, "policy_loss": 0.0009796429658308625, "vf_loss": 7.541810512542725, "vf_explained_var": 0.6045008897781372, "kl": 0.002173337619751692, "entropy": 0.8185964226722717, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3827200, "num_env_steps_trained": 3827200, "num_agent_steps_sampled": 7654400, "num_agent_steps_trained": 7654400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.71}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.22, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.29, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.52, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.26, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.15, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.95, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.79, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.74, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.54, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028457259266864, "mean_inference_ms": 1.258041513717552, "mean_action_processing_ms": 0.1344655857753008, "mean_env_wait_ms": 0.8458612410170017, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.71}, "hist_stats": {"episode_reward": [584.0, 584.0, 579.0, 579.0, 627.0, 570.0, 582.0, 630.0, 630.0, 630.0, 582.0, 627.0, 576.0, 627.0, 582.0, 582.0, 582.0, 579.0, 630.0, 590.0, 576.0, 570.0, 587.0, 579.0, 627.0, 636.0, 630.0, 630.0, 570.0, 582.0, 582.0, 576.0, 584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 287.0, 305.0, 279.0, 289.0, 290.0, 287.0, 292.0, 313.0, 314.0, 280.0, 290.0, 290.0, 292.0, 317.0, 313.0, 318.0, 312.0, 316.0, 314.0, 296.0, 286.0, 315.0, 312.0, 283.0, 293.0, 310.0, 317.0, 288.0, 294.0, 291.0, 291.0, 287.0, 295.0, 289.0, 290.0, 313.0, 317.0, 296.0, 294.0, 286.0, 290.0, 279.0, 291.0, 289.0, 298.0, 290.0, 289.0, 318.0, 309.0, 322.0, 314.0, 310.0, 320.0, 316.0, 314.0, 288.0, 282.0, 287.0, 295.0, 292.0, 290.0, 292.0, 284.0, 286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7028457259266864, "mean_inference_ms": 1.258041513717552, "mean_action_processing_ms": 0.1344655857753008, "mean_env_wait_ms": 0.8458612410170017, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7654400, "num_agent_steps_trained": 7654400, "num_env_steps_sampled": 3827200, "num_env_steps_trained": 3827200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3827200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7654400, "timers": {"training_iteration_time_ms": 3658.483, "learn_time_ms": 1105.958, "learn_throughput": 11573.68, "synch_weights_time_ms": 11.662}, "counters": {"num_env_steps_sampled": 3827200, "num_env_steps_trained": 3827200, "num_agent_steps_sampled": 7654400, "num_agent_steps_trained": 7654400}, "done": false, "episodes_total": 9568, "training_iteration": 299, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-19-53", "timestamp": 1666581593, "time_this_iter_s": 3.615095615386963, "time_total_s": 1150.0231590270996, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1150.0231590270996, "timesteps_since_restore": 0, "iterations_since_restore": 299, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.3, "ram_util_percent": 10.62}} +{"evaluation": {"average_sparse_reward": 200.0, "num_healthy_workers": 0, "num_recreated_workers": 0}, "custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.8, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.46, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.87, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00201015523634851, "policy_loss": -0.0023469526786357164, "vf_loss": 7.471502304077148, "vf_explained_var": 0.6078072786331177, "kl": 0.002010123338550329, "entropy": 0.8207009434700012, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3840000, "num_env_steps_trained": 3840000, "num_agent_steps_sampled": 7680000, "num_agent_steps_trained": 7680000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.8}, "custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.8, "shaped_reward_min": 164, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.46, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.83, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.06, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.87, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.74, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.61, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.83, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.83, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7027726542537445, "mean_inference_ms": 1.257838613560412, "mean_action_processing_ms": 0.1344571221669019, "mean_env_wait_ms": 0.845761859715749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 260.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.8}, "hist_stats": {"episode_reward": [584.0, 576.0, 576.0, 584.0, 581.0, 576.0, 579.0, 633.0, 530.0, 579.0, 573.0, 579.0, 584.0, 525.0, 582.0, 582.0, 582.0, 576.0, 579.0, 630.0, 633.0, 579.0, 636.0, 582.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 582.0, 573.0, 579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 298.0, 290.0, 286.0, 289.0, 287.0, 292.0, 292.0, 292.0, 289.0, 288.0, 288.0, 287.0, 292.0, 317.0, 316.0, 264.0, 266.0, 291.0, 288.0, 288.0, 285.0, 292.0, 287.0, 295.0, 289.0, 260.0, 265.0, 286.0, 296.0, 291.0, 291.0, 285.0, 297.0, 295.0, 281.0, 290.0, 289.0, 314.0, 316.0, 317.0, 316.0, 288.0, 291.0, 318.0, 318.0, 293.0, 289.0, 287.0, 292.0, 289.0, 293.0, 318.0, 312.0, 291.0, 291.0, 285.0, 294.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7027726542537445, "mean_inference_ms": 1.257838613560412, "mean_action_processing_ms": 0.1344571221669019, "mean_env_wait_ms": 0.845761859715749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7680000, "num_agent_steps_trained": 7680000, "num_env_steps_sampled": 3840000, "num_env_steps_trained": 3840000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3840000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7680000, "timers": {"training_iteration_time_ms": 3670.074, "learn_time_ms": 1112.802, "learn_throughput": 11502.495, "synch_weights_time_ms": 11.61}, "counters": {"num_env_steps_sampled": 3840000, "num_env_steps_trained": 3840000, "num_agent_steps_sampled": 7680000, "num_agent_steps_trained": 7680000}, "done": false, "episodes_total": 9600, "training_iteration": 300, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-00", "timestamp": 1666581600, "time_this_iter_s": 7.122778654098511, "time_total_s": 1157.1459376811981, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1157.1459376811981, "timesteps_since_restore": 0, "iterations_since_restore": 300, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 14.236363636363636, "ram_util_percent": 10.599999999999998}} +{"custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.02, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001016387133859098, "policy_loss": 0.0006756494985893369, "vf_loss": 7.564582824707031, "vf_explained_var": 0.6276317834854126, "kl": 0.0027429345063865185, "entropy": 0.8314375877380371, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3852800, "num_env_steps_trained": 3852800, "num_agent_steps_sampled": 7705600, "num_agent_steps_trained": 7705600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.01}, "custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.02, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.09, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.62, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.79, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.19, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.7, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.66, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.62, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.62, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7027120703901027, "mean_inference_ms": 1.2576525999597852, "mean_action_processing_ms": 0.13445152413405506, "mean_env_wait_ms": 0.8456862565452087, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 295.01}, "hist_stats": {"episode_reward": [579.0, 579.0, 636.0, 579.0, 587.0, 582.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 582.0, 633.0, 630.0, 576.0, 576.0, 582.0, 579.0, 630.0, 576.0, 579.0, 582.0, 584.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 636.0, 627.0, 633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 290.0, 289.0, 318.0, 318.0, 284.0, 295.0, 293.0, 294.0, 291.0, 291.0, 314.0, 316.0, 291.0, 288.0, 286.0, 293.0, 291.0, 285.0, 290.0, 289.0, 292.0, 290.0, 294.0, 288.0, 314.0, 319.0, 314.0, 316.0, 286.0, 290.0, 286.0, 290.0, 290.0, 292.0, 297.0, 282.0, 317.0, 313.0, 288.0, 288.0, 293.0, 286.0, 291.0, 291.0, 297.0, 287.0, 297.0, 285.0, 289.0, 290.0, 290.0, 292.0, 279.0, 297.0, 297.0, 282.0, 289.0, 284.0, 315.0, 321.0, 319.0, 308.0, 317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7027120703901027, "mean_inference_ms": 1.2576525999597852, "mean_action_processing_ms": 0.13445152413405506, "mean_env_wait_ms": 0.8456862565452087, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7705600, "num_agent_steps_trained": 7705600, "num_env_steps_sampled": 3852800, "num_env_steps_trained": 3852800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3852800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7705600, "timers": {"training_iteration_time_ms": 3666.048, "learn_time_ms": 1107.077, "learn_throughput": 11561.979, "synch_weights_time_ms": 11.621}, "counters": {"num_env_steps_sampled": 3852800, "num_env_steps_trained": 3852800, "num_agent_steps_sampled": 7705600, "num_agent_steps_trained": 7705600}, "done": false, "episodes_total": 9632, "training_iteration": 301, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-04", "timestamp": 1666581604, "time_this_iter_s": 3.587852716445923, "time_total_s": 1160.733790397644, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1160.733790397644, "timesteps_since_restore": 0, "iterations_since_restore": 301, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.46, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.04, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.84, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015810655895620584, "policy_loss": 0.0012423595180734992, "vf_loss": 7.515679359436035, "vf_explained_var": 0.569876492023468, "kl": 0.0021238639019429684, "entropy": 0.8257216215133667, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3865600, "num_env_steps_trained": 3865600, "num_agent_steps_sampled": 7731200, "num_agent_steps_trained": 7731200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 295.02}, "custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.04, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.51, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.15, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.97, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.81, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.16, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.84, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.76, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.81, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.81, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7026520157753002, "mean_inference_ms": 1.2574482022329547, "mean_action_processing_ms": 0.1344425360928672, "mean_env_wait_ms": 0.8455882415302208, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 295.02}, "hist_stats": {"episode_reward": [633.0, 630.0, 582.0, 587.0, 627.0, 584.0, 633.0, 539.0, 579.0, 627.0, 576.0, 630.0, 630.0, 582.0, 630.0, 636.0, 636.0, 633.0, 590.0, 579.0, 584.0, 639.0, 579.0, 590.0, 582.0, 582.0, 582.0, 587.0, 630.0, 582.0, 564.0, 579.0, 582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 312.0, 318.0, 293.0, 289.0, 293.0, 294.0, 317.0, 310.0, 297.0, 287.0, 316.0, 317.0, 277.0, 262.0, 288.0, 291.0, 312.0, 315.0, 288.0, 288.0, 322.0, 308.0, 322.0, 308.0, 298.0, 284.0, 313.0, 317.0, 320.0, 316.0, 318.0, 318.0, 312.0, 321.0, 293.0, 297.0, 299.0, 280.0, 294.0, 290.0, 320.0, 319.0, 284.0, 295.0, 293.0, 297.0, 290.0, 292.0, 291.0, 291.0, 290.0, 292.0, 291.0, 296.0, 321.0, 309.0, 295.0, 287.0, 275.0, 289.0, 295.0, 284.0, 291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7026520157753002, "mean_inference_ms": 1.2574482022329547, "mean_action_processing_ms": 0.1344425360928672, "mean_env_wait_ms": 0.8455882415302208, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7731200, "num_agent_steps_trained": 7731200, "num_env_steps_sampled": 3865600, "num_env_steps_trained": 3865600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3865600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7731200, "timers": {"training_iteration_time_ms": 3653.339, "learn_time_ms": 1097.641, "learn_throughput": 11661.368, "synch_weights_time_ms": 11.078}, "counters": {"num_env_steps_sampled": 3865600, "num_env_steps_trained": 3865600, "num_agent_steps_sampled": 7731200, "num_agent_steps_trained": 7731200}, "done": false, "episodes_total": 9664, "training_iteration": 302, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-08", "timestamp": 1666581608, "time_this_iter_s": 3.577674388885498, "time_total_s": 1164.3114647865295, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1164.3114647865295, "timesteps_since_restore": 0, "iterations_since_restore": 302, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.316666666666666, "ram_util_percent": 10.633333333333333}} +{"custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 180.42, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.79, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.79, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.79, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004884917289018631, "policy_loss": -0.0008183724712580442, "vf_loss": 7.473912239074707, "vf_explained_var": 0.607109785079956, "kl": 0.0021743732504546642, "entropy": 0.8350157737731934, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3878400, "num_env_steps_trained": 3878400, "num_agent_steps_sampled": 7756800, "num_agent_steps_trained": 7756800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 584.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 292.01}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 180.42, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.14, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.06, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.95, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.79, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.14, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.8, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.21, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.24, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.68, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.79, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.79, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7025833578758661, "mean_inference_ms": 1.2572199040666918, "mean_action_processing_ms": 0.13443044709578586, "mean_env_wait_ms": 0.8454600698531514, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 584.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 90.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 292.01}, "hist_stats": {"episode_reward": [582.0, 576.0, 582.0, 584.0, 639.0, 576.0, 573.0, 573.0, 633.0, 582.0, 636.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 633.0, 576.0, 579.0, 561.0, 582.0, 582.0, 633.0, 582.0, 627.0, 573.0, 180.0, 582.0, 579.0, 624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 276.0, 300.0, 290.0, 292.0, 297.0, 287.0, 320.0, 319.0, 293.0, 283.0, 291.0, 282.0, 287.0, 286.0, 313.0, 320.0, 288.0, 294.0, 318.0, 318.0, 295.0, 287.0, 289.0, 293.0, 296.0, 291.0, 295.0, 284.0, 295.0, 287.0, 298.0, 284.0, 295.0, 284.0, 314.0, 313.0, 321.0, 312.0, 283.0, 293.0, 291.0, 288.0, 273.0, 288.0, 292.0, 290.0, 284.0, 298.0, 318.0, 315.0, 290.0, 292.0, 319.0, 308.0, 290.0, 283.0, 90.0, 90.0, 288.0, 294.0, 289.0, 290.0, 313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7025833578758661, "mean_inference_ms": 1.2572199040666918, "mean_action_processing_ms": 0.13443044709578586, "mean_env_wait_ms": 0.8454600698531514, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7756800, "num_agent_steps_trained": 7756800, "num_env_steps_sampled": 3878400, "num_env_steps_trained": 3878400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3878400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7756800, "timers": {"training_iteration_time_ms": 3641.816, "learn_time_ms": 1098.084, "learn_throughput": 11656.669, "synch_weights_time_ms": 11.273}, "counters": {"num_env_steps_sampled": 3878400, "num_env_steps_trained": 3878400, "num_agent_steps_sampled": 7756800, "num_agent_steps_trained": 7756800}, "done": false, "episodes_total": 9696, "training_iteration": 303, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-12", "timestamp": 1666581612, "time_this_iter_s": 3.5357253551483154, "time_total_s": 1167.8471901416779, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1167.8471901416779, "timesteps_since_restore": 0, "iterations_since_restore": 303, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 22.98, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.61, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.16, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.88, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.77, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.88, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.88, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -6.053224205970764e-05, "policy_loss": -0.00038139696698635817, "vf_loss": 7.4262614250183105, "vf_explained_var": 0.581686794757843, "kl": 0.002229546196758747, "entropy": 0.843519926071167, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3891200, "num_env_steps_trained": 3891200, "num_agent_steps_sampled": 7782400, "num_agent_steps_trained": 7782400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 588.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 294.205}, "custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.61, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.35, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.16, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.88, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.77, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.23, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.88, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.88, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7025108781361386, "mean_inference_ms": 1.2569913037664557, "mean_action_processing_ms": 0.1344181305836977, "mean_env_wait_ms": 0.8453296828003616, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 588.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 294.205}, "hist_stats": {"episode_reward": [624.0, 576.0, 582.0, 582.0, 570.0, 579.0, 587.0, 582.0, 630.0, 627.0, 573.0, 636.0, 636.0, 582.0, 630.0, 465.0, 582.0, 579.0, 639.0, 579.0, 579.0, 530.0, 579.0, 633.0, 582.0, 630.0, 582.0, 582.0, 582.0, 627.0, 584.0, 570.0, 579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 311.0, 294.0, 282.0, 294.0, 288.0, 292.0, 290.0, 274.0, 296.0, 288.0, 291.0, 290.0, 297.0, 288.0, 294.0, 310.0, 320.0, 304.0, 323.0, 289.0, 284.0, 319.0, 317.0, 317.0, 319.0, 293.0, 289.0, 319.0, 311.0, 236.0, 229.0, 292.0, 290.0, 289.0, 290.0, 318.0, 321.0, 294.0, 285.0, 290.0, 289.0, 264.0, 266.0, 292.0, 287.0, 317.0, 316.0, 287.0, 295.0, 308.0, 322.0, 297.0, 285.0, 292.0, 290.0, 292.0, 290.0, 323.0, 304.0, 291.0, 293.0, 290.0, 280.0, 293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7025108781361386, "mean_inference_ms": 1.2569913037664557, "mean_action_processing_ms": 0.1344181305836977, "mean_env_wait_ms": 0.8453296828003616, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7782400, "num_agent_steps_trained": 7782400, "num_env_steps_sampled": 3891200, "num_env_steps_trained": 3891200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3891200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7782400, "timers": {"training_iteration_time_ms": 3636.039, "learn_time_ms": 1086.337, "learn_throughput": 11782.72, "synch_weights_time_ms": 11.233}, "counters": {"num_env_steps_sampled": 3891200, "num_env_steps_trained": 3891200, "num_agent_steps_sampled": 7782400, "num_agent_steps_trained": 7782400}, "done": false, "episodes_total": 9728, "training_iteration": 304, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-15", "timestamp": 1666581615, "time_this_iter_s": 3.5915231704711914, "time_total_s": 1171.438713312149, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1171.438713312149, "timesteps_since_restore": 0, "iterations_since_restore": 304, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.979999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.91, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.51, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.85, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.24, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.24, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.24, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005292992573231459, "policy_loss": 0.00019212259212508798, "vf_loss": 7.56189489364624, "vf_explained_var": 0.5613787174224854, "kl": 0.0021602341439574957, "entropy": 0.8380213379859924, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3904000, "num_env_steps_trained": 3904000, "num_agent_steps_sampled": 7808000, "num_agent_steps_trained": 7808000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 588.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 294.355}, "custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.91, "shaped_reward_min": 94, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.03, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.51, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.85, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.24, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.6, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.28, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.22, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.48, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.24, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.6, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.24, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.6, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7024161841210586, "mean_inference_ms": 1.2567649124208453, "mean_action_processing_ms": 0.1344072531232298, "mean_env_wait_ms": 0.845211433040442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 294.0, "episode_reward_mean": 588.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 294.355}, "hist_stats": {"episode_reward": [579.0, 633.0, 582.0, 582.0, 584.0, 593.0, 582.0, 630.0, 579.0, 579.0, 579.0, 582.0, 627.0, 582.0, 294.0, 579.0, 579.0, 579.0, 582.0, 630.0, 627.0, 579.0, 639.0, 587.0, 579.0, 590.0, 582.0, 576.0, 584.0, 630.0, 582.0, 587.0, 582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 318.0, 315.0, 291.0, 291.0, 289.0, 293.0, 298.0, 286.0, 293.0, 300.0, 296.0, 286.0, 315.0, 315.0, 288.0, 291.0, 290.0, 289.0, 285.0, 294.0, 294.0, 288.0, 310.0, 317.0, 287.0, 295.0, 149.0, 145.0, 291.0, 288.0, 290.0, 289.0, 288.0, 291.0, 287.0, 295.0, 313.0, 317.0, 311.0, 316.0, 292.0, 287.0, 318.0, 321.0, 296.0, 291.0, 285.0, 294.0, 294.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 295.0, 315.0, 315.0, 291.0, 291.0, 295.0, 292.0, 291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7024161841210586, "mean_inference_ms": 1.2567649124208453, "mean_action_processing_ms": 0.1344072531232298, "mean_env_wait_ms": 0.845211433040442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7808000, "num_agent_steps_trained": 7808000, "num_env_steps_sampled": 3904000, "num_env_steps_trained": 3904000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3904000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7808000, "timers": {"training_iteration_time_ms": 3619.018, "learn_time_ms": 1074.683, "learn_throughput": 11910.493, "synch_weights_time_ms": 10.387}, "counters": {"num_env_steps_sampled": 3904000, "num_env_steps_trained": 3904000, "num_agent_steps_sampled": 7808000, "num_agent_steps_trained": 7808000}, "done": false, "episodes_total": 9760, "training_iteration": 305, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-19", "timestamp": 1666581619, "time_this_iter_s": 3.606945037841797, "time_total_s": 1175.0456583499908, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1175.0456583499908, "timesteps_since_restore": 0, "iterations_since_restore": 305, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.616666666666664, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.85, "shaped_reward_min": 156, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010284465970471501, "policy_loss": 0.0006863062153570354, "vf_loss": 7.621923446655273, "vf_explained_var": 0.5728992223739624, "kl": 0.0019947909750044346, "entropy": 0.8400977849960327, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3916800, "num_env_steps_trained": 3916800, "num_agent_steps_sampled": 7833600, "num_agent_steps_trained": 7833600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 516.0, "episode_reward_mean": 591.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 295.525}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.85, "shaped_reward_min": 156, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.42, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.14, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.21, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.9, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.11, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.6, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.58, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.9, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.9, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7023278356550204, "mean_inference_ms": 1.2565710368914762, "mean_action_processing_ms": 0.13440078245615697, "mean_env_wait_ms": 0.8451295423921351, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 516.0, "episode_reward_mean": 591.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 295.525}, "hist_stats": {"episode_reward": [582.0, 573.0, 582.0, 582.0, 627.0, 582.0, 579.0, 579.0, 584.0, 576.0, 630.0, 581.0, 579.0, 630.0, 582.0, 633.0, 587.0, 584.0, 576.0, 536.0, 582.0, 576.0, 582.0, 627.0, 624.0, 630.0, 576.0, 587.0, 569.0, 579.0, 582.0, 576.0, 630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 291.0, 282.0, 284.0, 298.0, 291.0, 291.0, 313.0, 314.0, 289.0, 293.0, 290.0, 289.0, 287.0, 292.0, 291.0, 293.0, 289.0, 287.0, 315.0, 315.0, 285.0, 296.0, 292.0, 287.0, 314.0, 316.0, 293.0, 289.0, 320.0, 313.0, 287.0, 300.0, 288.0, 296.0, 283.0, 293.0, 267.0, 269.0, 287.0, 295.0, 288.0, 288.0, 290.0, 292.0, 317.0, 310.0, 304.0, 320.0, 318.0, 312.0, 290.0, 286.0, 295.0, 292.0, 275.0, 294.0, 285.0, 294.0, 292.0, 290.0, 289.0, 287.0, 306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7023278356550204, "mean_inference_ms": 1.2565710368914762, "mean_action_processing_ms": 0.13440078245615697, "mean_env_wait_ms": 0.8451295423921351, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7833600, "num_agent_steps_trained": 7833600, "num_env_steps_sampled": 3916800, "num_env_steps_trained": 3916800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3916800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7833600, "timers": {"training_iteration_time_ms": 3558.221, "learn_time_ms": 1059.646, "learn_throughput": 12079.502, "synch_weights_time_ms": 10.299}, "counters": {"num_env_steps_sampled": 3916800, "num_env_steps_trained": 3916800, "num_agent_steps_sampled": 7833600, "num_agent_steps_trained": 7833600}, "done": false, "episodes_total": 9792, "training_iteration": 306, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-23", "timestamp": 1666581623, "time_this_iter_s": 3.6415607929229736, "time_total_s": 1178.6872191429138, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1178.6872191429138, "timesteps_since_restore": 0, "iterations_since_restore": 306, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.16, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.61, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.24, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003629318671301007, "policy_loss": 2.1011597709730268e-05, "vf_loss": 7.619492530822754, "vf_explained_var": 0.5936301946640015, "kl": 0.002933789975941181, "entropy": 0.8400548696517944, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3929600, "num_env_steps_trained": 3929600, "num_agent_steps_sampled": 7859200, "num_agent_steps_trained": 7859200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 589.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.605}, "custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.61, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.39, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.58, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.13, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.24, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.25, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.28, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.16, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.14, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.92, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.75, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.3, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.55, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.67, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.92, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.92, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7022557630562534, "mean_inference_ms": 1.2563812177822142, "mean_action_processing_ms": 0.13439551115108503, "mean_env_wait_ms": 0.8450603332508823, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 589.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.605}, "hist_stats": {"episode_reward": [630.0, 579.0, 573.0, 627.0, 576.0, 522.0, 582.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 636.0, 570.0, 636.0, 633.0, 582.0, 576.0, 582.0, 582.0, 636.0, 587.0, 582.0, 582.0, 630.0, 579.0, 573.0, 636.0, 587.0, 630.0, 582.0, 582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 324.0, 292.0, 287.0, 289.0, 284.0, 316.0, 311.0, 287.0, 289.0, 256.0, 266.0, 288.0, 294.0, 293.0, 283.0, 287.0, 289.0, 310.0, 320.0, 291.0, 288.0, 291.0, 288.0, 292.0, 287.0, 316.0, 320.0, 281.0, 289.0, 317.0, 319.0, 316.0, 317.0, 294.0, 288.0, 282.0, 294.0, 287.0, 295.0, 288.0, 294.0, 320.0, 316.0, 296.0, 291.0, 286.0, 296.0, 292.0, 290.0, 319.0, 311.0, 293.0, 286.0, 283.0, 290.0, 319.0, 317.0, 289.0, 298.0, 314.0, 316.0, 288.0, 294.0, 296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7022557630562534, "mean_inference_ms": 1.2563812177822142, "mean_action_processing_ms": 0.13439551115108503, "mean_env_wait_ms": 0.8450603332508823, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7859200, "num_agent_steps_trained": 7859200, "num_env_steps_sampled": 3929600, "num_env_steps_trained": 3929600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3929600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7859200, "timers": {"training_iteration_time_ms": 3549.51, "learn_time_ms": 1058.208, "learn_throughput": 12095.923, "synch_weights_time_ms": 10.407}, "counters": {"num_env_steps_sampled": 3929600, "num_env_steps_trained": 3929600, "num_agent_steps_sampled": 7859200, "num_agent_steps_trained": 7859200}, "done": false, "episodes_total": 9824, "training_iteration": 307, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-27", "timestamp": 1666581627, "time_this_iter_s": 3.666581630706787, "time_total_s": 1182.3538007736206, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1182.3538007736206, "timesteps_since_restore": 0, "iterations_since_restore": 307, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.04, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 204.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.004296698607504368, "policy_loss": -0.00463396031409502, "vf_loss": 7.600736618041992, "vf_explained_var": 0.5694471597671509, "kl": 0.0021462240256369114, "entropy": 0.8456211090087891, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3942400, "num_env_steps_trained": 3942400, "num_agent_steps_sampled": 7884800, "num_agent_steps_trained": 7884800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 590.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 295.475}, "custom_metrics": {"sparse_reward_mean": 204.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.43, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.0, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.21, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.25, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.14, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.13, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.19, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.75, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.12, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.93, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.25, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.25, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.52, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.19, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.75, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.19, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.75, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7021820115704458, "mean_inference_ms": 1.25616924276832, "mean_action_processing_ms": 0.13438751298201573, "mean_env_wait_ms": 0.8449627802904494, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 590.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 295.475}, "hist_stats": {"episode_reward": [582.0, 587.0, 582.0, 579.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 627.0, 633.0, 636.0, 636.0, 579.0, 573.0, 584.0, 627.0, 582.0, 516.0, 576.0, 579.0, 627.0, 633.0, 570.0, 630.0, 627.0, 570.0, 579.0, 579.0, 576.0, 582.0, 584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 286.0, 295.0, 292.0, 288.0, 294.0, 288.0, 291.0, 293.0, 294.0, 289.0, 293.0, 289.0, 293.0, 285.0, 288.0, 288.0, 291.0, 282.0, 288.0, 320.0, 307.0, 310.0, 323.0, 316.0, 320.0, 317.0, 319.0, 286.0, 293.0, 287.0, 286.0, 286.0, 298.0, 312.0, 315.0, 291.0, 291.0, 258.0, 258.0, 284.0, 292.0, 286.0, 293.0, 317.0, 310.0, 312.0, 321.0, 284.0, 286.0, 310.0, 320.0, 310.0, 317.0, 287.0, 283.0, 289.0, 290.0, 290.0, 289.0, 294.0, 282.0, 287.0, 295.0, 287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7021820115704458, "mean_inference_ms": 1.25616924276832, "mean_action_processing_ms": 0.13438751298201573, "mean_env_wait_ms": 0.8449627802904494, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7884800, "num_agent_steps_trained": 7884800, "num_env_steps_sampled": 3942400, "num_env_steps_trained": 3942400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3942400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7884800, "timers": {"training_iteration_time_ms": 3550.198, "learn_time_ms": 1061.666, "learn_throughput": 12056.517, "synch_weights_time_ms": 10.477}, "counters": {"num_env_steps_sampled": 3942400, "num_env_steps_trained": 3942400, "num_agent_steps_sampled": 7884800, "num_agent_steps_trained": 7884800}, "done": false, "episodes_total": 9856, "training_iteration": 308, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-30", "timestamp": 1666581630, "time_this_iter_s": 3.5699198246002197, "time_total_s": 1185.9237205982208, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1185.9237205982208, "timesteps_since_restore": 0, "iterations_since_restore": 308, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.51666666666667, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 183.47, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.6, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.6, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.6, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002151058055460453, "policy_loss": 0.0018065437907353044, "vf_loss": 7.667585372924805, "vf_explained_var": 0.560869574546814, "kl": 0.0021857996471226215, "entropy": 0.8444837331771851, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3955200, "num_env_steps_trained": 3955200, "num_agent_steps_sampled": 7910400, "num_agent_steps_trained": 7910400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 593.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.935}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 183.47, "shaped_reward_min": 71, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.08, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.94, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.22, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.6, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.55, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.22, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.31, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.53, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.89, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.49, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.6, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.55, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.6, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.55, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.702118067809688, "mean_inference_ms": 1.2559595497726916, "mean_action_processing_ms": 0.13437893028732162, "mean_env_wait_ms": 0.8448680672814765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 191.0, "episode_reward_mean": 593.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 91.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 296.935}, "hist_stats": {"episode_reward": [584.0, 579.0, 579.0, 576.0, 633.0, 582.0, 639.0, 639.0, 579.0, 582.0, 633.0, 624.0, 579.0, 584.0, 582.0, 582.0, 582.0, 462.0, 633.0, 582.0, 582.0, 636.0, 576.0, 584.0, 633.0, 593.0, 627.0, 191.0, 579.0, 639.0, 576.0, 579.0, 582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 297.0, 289.0, 290.0, 293.0, 286.0, 280.0, 296.0, 311.0, 322.0, 293.0, 289.0, 318.0, 321.0, 313.0, 326.0, 287.0, 292.0, 287.0, 295.0, 318.0, 315.0, 316.0, 308.0, 292.0, 287.0, 303.0, 281.0, 294.0, 288.0, 291.0, 291.0, 291.0, 291.0, 230.0, 232.0, 318.0, 315.0, 290.0, 292.0, 292.0, 290.0, 316.0, 320.0, 284.0, 292.0, 292.0, 292.0, 309.0, 324.0, 299.0, 294.0, 317.0, 310.0, 91.0, 100.0, 288.0, 291.0, 319.0, 320.0, 288.0, 288.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.702118067809688, "mean_inference_ms": 1.2559595497726916, "mean_action_processing_ms": 0.13437893028732162, "mean_env_wait_ms": 0.8448680672814765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7910400, "num_agent_steps_trained": 7910400, "num_env_steps_sampled": 3955200, "num_env_steps_trained": 3955200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3955200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7910400, "timers": {"training_iteration_time_ms": 3550.934, "learn_time_ms": 1063.959, "learn_throughput": 12030.537, "synch_weights_time_ms": 9.979}, "counters": {"num_env_steps_sampled": 3955200, "num_env_steps_trained": 3955200, "num_agent_steps_sampled": 7910400, "num_agent_steps_trained": 7910400}, "done": false, "episodes_total": 9888, "training_iteration": 309, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-34", "timestamp": 1666581634, "time_this_iter_s": 3.6177916526794434, "time_total_s": 1189.5415122509003, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1189.5415122509003, "timesteps_since_restore": 0, "iterations_since_restore": 309, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.979999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.13, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.64, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.64, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.64, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015459628775715828, "policy_loss": -0.001896196510642767, "vf_loss": 7.714743614196777, "vf_explained_var": 0.5556751489639282, "kl": 0.002252227161079645, "entropy": 0.8424784541130066, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3968000, "num_env_steps_trained": 3968000, "num_agent_steps_sampled": 7936000, "num_agent_steps_trained": 7936000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 600.73, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 267.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 300.365}, "custom_metrics": {"sparse_reward_mean": 207.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 185.13, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.33, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.9, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.06, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.64, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 17.8, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.0, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 13, "useful_dish_pickup_agent_0_mean": 5.78, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.27, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 6, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.65, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.64, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 17.8, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.64, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 17.8, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7020417621444395, "mean_inference_ms": 1.2558035576888165, "mean_action_processing_ms": 0.1343661017492229, "mean_env_wait_ms": 0.8449207548688427, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 536.0, "episode_reward_mean": 600.73, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 267.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 300.365}, "hist_stats": {"episode_reward": [582.0, 579.0, 633.0, 584.0, 590.0, 579.0, 636.0, 576.0, 582.0, 582.0, 582.0, 582.0, 636.0, 587.0, 630.0, 579.0, 587.0, 590.0, 633.0, 633.0, 627.0, 582.0, 633.0, 630.0, 576.0, 573.0, 579.0, 639.0, 630.0, 582.0, 639.0, 587.0, 582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 287.0, 292.0, 315.0, 318.0, 287.0, 297.0, 296.0, 294.0, 290.0, 289.0, 320.0, 316.0, 286.0, 290.0, 290.0, 292.0, 295.0, 287.0, 292.0, 290.0, 293.0, 289.0, 317.0, 319.0, 294.0, 293.0, 312.0, 318.0, 291.0, 288.0, 284.0, 303.0, 297.0, 293.0, 319.0, 314.0, 319.0, 314.0, 314.0, 313.0, 293.0, 289.0, 317.0, 316.0, 314.0, 316.0, 288.0, 288.0, 283.0, 290.0, 290.0, 289.0, 320.0, 319.0, 319.0, 311.0, 294.0, 288.0, 319.0, 320.0, 291.0, 296.0, 286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7020417621444395, "mean_inference_ms": 1.2558035576888165, "mean_action_processing_ms": 0.1343661017492229, "mean_env_wait_ms": 0.8449207548688427, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7936000, "num_agent_steps_trained": 7936000, "num_env_steps_sampled": 3968000, "num_env_steps_trained": 3968000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3968000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7936000, "timers": {"training_iteration_time_ms": 3558.356, "learn_time_ms": 1063.744, "learn_throughput": 12032.967, "synch_weights_time_ms": 10.031}, "counters": {"num_env_steps_sampled": 3968000, "num_env_steps_trained": 3968000, "num_agent_steps_sampled": 7936000, "num_agent_steps_trained": 7936000}, "done": false, "episodes_total": 9920, "training_iteration": 310, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-38", "timestamp": 1666581638, "time_this_iter_s": 3.787797212600708, "time_total_s": 1193.329309463501, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1193.329309463501, "timesteps_since_restore": 0, "iterations_since_restore": 310, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.733333333333334, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 206.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.72, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.38, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 18.15, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.82, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.81, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.77, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 18.15, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 18.15, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000862886430695653, "policy_loss": 0.0005224489723332226, "vf_loss": 7.662137031555176, "vf_explained_var": 0.55738765001297, "kl": 0.002454055706039071, "entropy": 0.8515514731407166, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3980800, "num_env_steps_trained": 3980800, "num_agent_steps_sampled": 7961600, "num_agent_steps_trained": 7961600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 597.32, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 298.66}, "custom_metrics": {"sparse_reward_mean": 206.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.72, "shaped_reward_min": 160, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.7, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.38, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.11, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 26, "potting_onion_agent_1_mean": 18.15, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.82, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.21, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.81, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.77, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.11, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 26, "optimal_onion_potting_agent_1_mean": 18.15, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.11, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 26, "viable_onion_potting_agent_1_mean": 18.15, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7019608180315113, "mean_inference_ms": 1.255644418855455, "mean_action_processing_ms": 0.13435158068712152, "mean_env_wait_ms": 0.8449615353261167, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 597.32, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 298.66}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 579.0, 582.0, 630.0, 630.0, 636.0, 579.0, 579.0, 587.0, 630.0, 630.0, 636.0, 579.0, 582.0, 582.0, 600.0, 630.0, 584.0, 630.0, 579.0, 579.0, 579.0, 593.0, 630.0, 576.0, 636.0, 630.0, 576.0, 584.0, 582.0, 584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 291.0, 291.0, 288.0, 291.0, 290.0, 289.0, 289.0, 293.0, 316.0, 314.0, 314.0, 316.0, 315.0, 321.0, 287.0, 292.0, 290.0, 289.0, 288.0, 299.0, 319.0, 311.0, 315.0, 315.0, 319.0, 317.0, 292.0, 287.0, 291.0, 291.0, 294.0, 288.0, 319.0, 281.0, 319.0, 311.0, 285.0, 299.0, 319.0, 311.0, 288.0, 291.0, 287.0, 292.0, 290.0, 289.0, 298.0, 295.0, 309.0, 321.0, 291.0, 285.0, 315.0, 321.0, 318.0, 312.0, 294.0, 282.0, 284.0, 300.0, 289.0, 293.0, 290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7019608180315113, "mean_inference_ms": 1.255644418855455, "mean_action_processing_ms": 0.13435158068712152, "mean_env_wait_ms": 0.8449615353261167, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7961600, "num_agent_steps_trained": 7961600, "num_env_steps_sampled": 3980800, "num_env_steps_trained": 3980800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3980800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7961600, "timers": {"training_iteration_time_ms": 3577.153, "learn_time_ms": 1073.814, "learn_throughput": 11920.123, "synch_weights_time_ms": 10.129}, "counters": {"num_env_steps_sampled": 3980800, "num_env_steps_trained": 3980800, "num_agent_steps_sampled": 7961600, "num_agent_steps_trained": 7961600}, "done": false, "episodes_total": 9952, "training_iteration": 311, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-42", "timestamp": 1666581642, "time_this_iter_s": 3.761920690536499, "time_total_s": 1197.0912301540375, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1197.0912301540375, "timesteps_since_restore": 0, "iterations_since_restore": 311, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.34, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.87, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.94, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.66, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.46, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.93, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.89, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.83, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.46, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.46, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001228528330102563, "policy_loss": -0.001570116844959557, "vf_loss": 7.6725358963012695, "vf_explained_var": 0.5661988854408264, "kl": 0.002121095545589924, "entropy": 0.8513274788856506, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 3993600, "num_env_steps_trained": 3993600, "num_agent_steps_sampled": 7987200, "num_agent_steps_trained": 7987200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 596.67, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 298.335}, "custom_metrics": {"sparse_reward_mean": 206.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.87, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.94, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.93, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.66, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.46, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.93, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.89, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.83, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.46, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.46, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7018695828111025, "mean_inference_ms": 1.255488717550174, "mean_action_processing_ms": 0.13433650614705042, "mean_env_wait_ms": 0.8449992857233162, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 596.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 298.335}, "hist_stats": {"episode_reward": [584.0, 579.0, 570.0, 633.0, 582.0, 579.0, 630.0, 582.0, 630.0, 536.0, 590.0, 582.0, 579.0, 630.0, 636.0, 633.0, 582.0, 579.0, 570.0, 636.0, 627.0, 582.0, 579.0, 582.0, 576.0, 584.0, 633.0, 630.0, 627.0, 627.0, 633.0, 636.0, 579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 284.0, 295.0, 286.0, 284.0, 314.0, 319.0, 294.0, 288.0, 286.0, 293.0, 319.0, 311.0, 288.0, 294.0, 314.0, 316.0, 267.0, 269.0, 289.0, 301.0, 293.0, 289.0, 288.0, 291.0, 322.0, 308.0, 319.0, 317.0, 315.0, 318.0, 298.0, 284.0, 286.0, 293.0, 288.0, 282.0, 321.0, 315.0, 312.0, 315.0, 294.0, 288.0, 289.0, 290.0, 289.0, 293.0, 293.0, 283.0, 294.0, 290.0, 315.0, 318.0, 314.0, 316.0, 317.0, 310.0, 318.0, 309.0, 313.0, 320.0, 317.0, 319.0, 284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7018695828111025, "mean_inference_ms": 1.255488717550174, "mean_action_processing_ms": 0.13433650614705042, "mean_env_wait_ms": 0.8449992857233162, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 7987200, "num_agent_steps_trained": 7987200, "num_env_steps_sampled": 3993600, "num_env_steps_trained": 3993600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 3993600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 7987200, "timers": {"training_iteration_time_ms": 3578.867, "learn_time_ms": 1077.185, "learn_throughput": 11882.823, "synch_weights_time_ms": 10.361}, "counters": {"num_env_steps_sampled": 3993600, "num_env_steps_trained": 3993600, "num_agent_steps_sampled": 7987200, "num_agent_steps_trained": 7987200}, "done": false, "episodes_total": 9984, "training_iteration": 312, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-46", "timestamp": 1666581646, "time_this_iter_s": 3.587341547012329, "time_total_s": 1200.6785717010498, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1200.6785717010498, "timesteps_since_restore": 0, "iterations_since_restore": 312, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.583333333333332, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.9, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.87, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 19.13, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.71, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.9, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.54, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.67, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.04, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.97, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.86, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.54, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.67, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.54, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.67, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0012437774566933513, "policy_loss": 0.0009095997083932161, "vf_loss": 7.573324203491211, "vf_explained_var": 0.5715011358261108, "kl": 0.002083378843963146, "entropy": 0.8463079929351807, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4006400, "num_env_steps_trained": 4006400, "num_agent_steps_sampled": 8012800, "num_agent_steps_trained": 8012800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 595.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 297.95}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.9, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.87, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 19.13, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.71, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 18.9, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.54, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 18.67, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.04, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.6, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.18, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.97, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.5, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.86, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.54, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 18.67, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.54, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 18.67, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7017718876395496, "mean_inference_ms": 1.2552825267149847, "mean_action_processing_ms": 0.13432257141987783, "mean_env_wait_ms": 0.8448774369248566, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 595.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 297.95}, "hist_stats": {"episode_reward": [579.0, 633.0, 633.0, 579.0, 579.0, 582.0, 587.0, 636.0, 627.0, 579.0, 570.0, 582.0, 587.0, 630.0, 582.0, 633.0, 567.0, 630.0, 576.0, 579.0, 639.0, 576.0, 587.0, 579.0, 582.0, 567.0, 630.0, 579.0, 630.0, 579.0, 538.0, 582.0, 522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 313.0, 320.0, 320.0, 313.0, 293.0, 286.0, 286.0, 293.0, 288.0, 294.0, 296.0, 291.0, 322.0, 314.0, 312.0, 315.0, 292.0, 287.0, 280.0, 290.0, 290.0, 292.0, 300.0, 287.0, 314.0, 316.0, 295.0, 287.0, 315.0, 318.0, 285.0, 282.0, 317.0, 313.0, 292.0, 284.0, 291.0, 288.0, 317.0, 322.0, 284.0, 292.0, 294.0, 293.0, 289.0, 290.0, 290.0, 292.0, 282.0, 285.0, 317.0, 313.0, 282.0, 297.0, 319.0, 311.0, 290.0, 289.0, 261.0, 277.0, 288.0, 294.0, 264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7017718876395496, "mean_inference_ms": 1.2552825267149847, "mean_action_processing_ms": 0.13432257141987783, "mean_env_wait_ms": 0.8448774369248566, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8012800, "num_agent_steps_trained": 8012800, "num_env_steps_sampled": 4006400, "num_env_steps_trained": 4006400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4006400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8012800, "timers": {"training_iteration_time_ms": 3587.248, "learn_time_ms": 1076.252, "learn_throughput": 11893.126, "synch_weights_time_ms": 9.82}, "counters": {"num_env_steps_sampled": 4006400, "num_env_steps_trained": 4006400, "num_agent_steps_sampled": 8012800, "num_agent_steps_trained": 8012800}, "done": false, "episodes_total": 10016, "training_iteration": 313, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-50", "timestamp": 1666581650, "time_this_iter_s": 3.6089444160461426, "time_total_s": 1204.287516117096, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1204.287516117096, "timesteps_since_restore": 0, "iterations_since_restore": 313, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.17, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.74, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.02, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.86, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.78, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00017371168360114098, "policy_loss": -0.00016079226043075323, "vf_loss": 7.614766597747803, "vf_explained_var": 0.5785398483276367, "kl": 0.0018128352239727974, "entropy": 0.8539440631866455, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4019200, "num_env_steps_trained": 4019200, "num_agent_steps_sampled": 8038400, "num_agent_steps_trained": 8038400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 596.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 298.285}, "custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.17, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.96, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.74, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.67, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.49, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.22, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.02, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.86, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.78, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.67, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.49, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.67, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.49, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016816063473018, "mean_inference_ms": 1.255069617024935, "mean_action_processing_ms": 0.1343095240072887, "mean_env_wait_ms": 0.8447524656146783, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 596.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 258.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 298.285}, "hist_stats": {"episode_reward": [522.0, 582.0, 627.0, 573.0, 579.0, 579.0, 587.0, 584.0, 530.0, 582.0, 630.0, 582.0, 587.0, 630.0, 579.0, 582.0, 633.0, 630.0, 582.0, 636.0, 582.0, 630.0, 627.0, 579.0, 582.0, 576.0, 579.0, 579.0, 639.0, 630.0, 627.0, 582.0, 584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 258.0, 295.0, 287.0, 311.0, 316.0, 281.0, 292.0, 287.0, 292.0, 285.0, 294.0, 298.0, 289.0, 291.0, 293.0, 267.0, 263.0, 288.0, 294.0, 317.0, 313.0, 288.0, 294.0, 292.0, 295.0, 316.0, 314.0, 290.0, 289.0, 294.0, 288.0, 317.0, 316.0, 311.0, 319.0, 290.0, 292.0, 321.0, 315.0, 293.0, 289.0, 306.0, 324.0, 312.0, 315.0, 294.0, 285.0, 290.0, 292.0, 290.0, 286.0, 289.0, 290.0, 296.0, 283.0, 318.0, 321.0, 312.0, 318.0, 309.0, 318.0, 286.0, 296.0, 290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016816063473018, "mean_inference_ms": 1.255069617024935, "mean_action_processing_ms": 0.1343095240072887, "mean_env_wait_ms": 0.8447524656146783, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8038400, "num_agent_steps_trained": 8038400, "num_env_steps_sampled": 4019200, "num_env_steps_trained": 4019200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4019200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8038400, "timers": {"training_iteration_time_ms": 3601.25, "learn_time_ms": 1093.157, "learn_throughput": 11709.21, "synch_weights_time_ms": 10.23}, "counters": {"num_env_steps_sampled": 4019200, "num_env_steps_trained": 4019200, "num_agent_steps_sampled": 8038400, "num_agent_steps_trained": 8038400}, "done": false, "episodes_total": 10048, "training_iteration": 314, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-54", "timestamp": 1666581654, "time_this_iter_s": 3.732663869857788, "time_total_s": 1208.0201799869537, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1208.0201799869537, "timesteps_since_restore": 0, "iterations_since_restore": 314, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.700000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.37, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.74, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 19.11, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.85, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.85, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.85, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0030033960938453674, "policy_loss": -0.0033531123772263527, "vf_loss": 7.692840576171875, "vf_explained_var": 0.5669675469398499, "kl": 0.003200692357495427, "entropy": 0.8391327261924744, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4032000, "num_env_steps_trained": 4032000, "num_agent_steps_sampled": 8064000, "num_agent_steps_trained": 8064000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 598.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 299.185}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.37, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.74, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.34, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.57, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 19.11, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.85, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.68, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.85, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.85, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016024368895014, "mean_inference_ms": 1.2548500981360906, "mean_action_processing_ms": 0.13429571912098523, "mean_env_wait_ms": 0.8446177174958618, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 598.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 299.185}, "hist_stats": {"episode_reward": [584.0, 587.0, 579.0, 633.0, 630.0, 630.0, 630.0, 582.0, 587.0, 630.0, 627.0, 579.0, 587.0, 639.0, 579.0, 584.0, 584.0, 539.0, 579.0, 630.0, 582.0, 587.0, 582.0, 579.0, 582.0, 582.0, 633.0, 576.0, 630.0, 579.0, 582.0, 576.0, 630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 291.0, 296.0, 287.0, 292.0, 311.0, 322.0, 313.0, 317.0, 318.0, 312.0, 311.0, 319.0, 287.0, 295.0, 291.0, 296.0, 315.0, 315.0, 314.0, 313.0, 295.0, 284.0, 294.0, 293.0, 324.0, 315.0, 287.0, 292.0, 296.0, 288.0, 294.0, 290.0, 265.0, 274.0, 288.0, 291.0, 315.0, 315.0, 291.0, 291.0, 296.0, 291.0, 292.0, 290.0, 289.0, 290.0, 289.0, 293.0, 291.0, 291.0, 315.0, 318.0, 290.0, 286.0, 315.0, 315.0, 290.0, 289.0, 286.0, 296.0, 287.0, 289.0, 320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7016024368895014, "mean_inference_ms": 1.2548500981360906, "mean_action_processing_ms": 0.13429571912098523, "mean_env_wait_ms": 0.8446177174958618, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8064000, "num_agent_steps_trained": 8064000, "num_env_steps_sampled": 4032000, "num_env_steps_trained": 4032000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4032000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8064000, "timers": {"training_iteration_time_ms": 3598.645, "learn_time_ms": 1096.864, "learn_throughput": 11669.635, "synch_weights_time_ms": 10.239}, "counters": {"num_env_steps_sampled": 4032000, "num_env_steps_trained": 4032000, "num_agent_steps_sampled": 8064000, "num_agent_steps_trained": 8064000}, "done": false, "episodes_total": 10080, "training_iteration": 315, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-20-57", "timestamp": 1666581657, "time_this_iter_s": 3.569254159927368, "time_total_s": 1211.589434146881, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1211.589434146881, "timesteps_since_restore": 0, "iterations_since_restore": 315, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.419999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.12, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.8, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.18, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.65, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.94, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.75, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.12, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.75, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.75, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010780815500766039, "policy_loss": -0.0014326130039989948, "vf_loss": 7.738142013549805, "vf_explained_var": 0.573646068572998, "kl": 0.0020240151789039373, "entropy": 0.8385658264160156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4044800, "num_env_steps_trained": 4044800, "num_agent_steps_sampled": 8089600, "num_agent_steps_trained": 8089600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 598.12, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 299.06}, "custom_metrics": {"sparse_reward_mean": 207.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 184.12, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.8, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 19.18, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.65, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.94, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.75, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 6.33, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.12, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.53, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.75, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.75, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7015318240181805, "mean_inference_ms": 1.2546321530437043, "mean_action_processing_ms": 0.13428390360660708, "mean_env_wait_ms": 0.8444934858469165, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 598.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 299.06}, "hist_stats": {"episode_reward": [630.0, 630.0, 579.0, 636.0, 633.0, 576.0, 627.0, 584.0, 636.0, 582.0, 573.0, 582.0, 630.0, 582.0, 579.0, 579.0, 576.0, 587.0, 573.0, 636.0, 633.0, 636.0, 576.0, 582.0, 579.0, 590.0, 579.0, 579.0, 579.0, 636.0, 581.0, 582.0, 582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 310.0, 318.0, 312.0, 289.0, 290.0, 319.0, 317.0, 317.0, 316.0, 288.0, 288.0, 312.0, 315.0, 290.0, 294.0, 319.0, 317.0, 292.0, 290.0, 283.0, 290.0, 286.0, 296.0, 317.0, 313.0, 286.0, 296.0, 286.0, 293.0, 286.0, 293.0, 283.0, 293.0, 293.0, 294.0, 282.0, 291.0, 318.0, 318.0, 318.0, 315.0, 316.0, 320.0, 284.0, 292.0, 286.0, 296.0, 290.0, 289.0, 293.0, 297.0, 289.0, 290.0, 294.0, 285.0, 289.0, 290.0, 319.0, 317.0, 287.0, 294.0, 284.0, 298.0, 297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7015318240181805, "mean_inference_ms": 1.2546321530437043, "mean_action_processing_ms": 0.13428390360660708, "mean_env_wait_ms": 0.8444934858469165, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8089600, "num_agent_steps_trained": 8089600, "num_env_steps_sampled": 4044800, "num_env_steps_trained": 4044800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4044800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8089600, "timers": {"training_iteration_time_ms": 3602.352, "learn_time_ms": 1097.997, "learn_throughput": 11657.591, "synch_weights_time_ms": 10.858}, "counters": {"num_env_steps_sampled": 4044800, "num_env_steps_trained": 4044800, "num_agent_steps_sampled": 8089600, "num_agent_steps_trained": 8089600}, "done": false, "episodes_total": 10112, "training_iteration": 316, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-01", "timestamp": 1666581661, "time_this_iter_s": 3.6783833503723145, "time_total_s": 1215.2678174972534, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1215.2678174972534, "timesteps_since_restore": 0, "iterations_since_restore": 316, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.380000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.18, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.37, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 19.5, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 19.27, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.93, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 19.09, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.62, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 6.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 6.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.93, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 19.09, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.93, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 19.09, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019808358047157526, "policy_loss": 0.0016194693744182587, "vf_loss": 7.821843147277832, "vf_explained_var": 0.5842312574386597, "kl": 0.0032284390181303024, "entropy": 0.8416324853897095, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4057600, "num_env_steps_trained": 4057600, "num_agent_steps_sampled": 8115200, "num_agent_steps_trained": 8115200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 595.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.59}, "custom_metrics": {"sparse_reward_mean": 206.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.18, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.37, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 19.5, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.23, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 19.27, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 13.93, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 19.09, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.62, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.45, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 6.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.31, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 6.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.27, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 13.93, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 19.09, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 13.93, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 19.09, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7014770298391392, "mean_inference_ms": 1.2544313326895178, "mean_action_processing_ms": 0.13427583305183213, "mean_env_wait_ms": 0.844396471243271, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 595.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.59}, "hist_stats": {"episode_reward": [582.0, 627.0, 579.0, 630.0, 582.0, 627.0, 516.0, 582.0, 584.0, 587.0, 582.0, 582.0, 576.0, 582.0, 627.0, 579.0, 627.0, 630.0, 630.0, 579.0, 627.0, 630.0, 582.0, 573.0, 636.0, 582.0, 576.0, 630.0, 627.0, 582.0, 630.0, 630.0, 587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 314.0, 313.0, 286.0, 293.0, 312.0, 318.0, 292.0, 290.0, 310.0, 317.0, 251.0, 265.0, 283.0, 299.0, 293.0, 291.0, 289.0, 298.0, 294.0, 288.0, 290.0, 292.0, 289.0, 287.0, 293.0, 289.0, 318.0, 309.0, 287.0, 292.0, 310.0, 317.0, 311.0, 319.0, 316.0, 314.0, 295.0, 284.0, 307.0, 320.0, 313.0, 317.0, 285.0, 297.0, 284.0, 289.0, 315.0, 321.0, 285.0, 297.0, 288.0, 288.0, 318.0, 312.0, 314.0, 313.0, 290.0, 292.0, 310.0, 320.0, 318.0, 312.0, 294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7014770298391392, "mean_inference_ms": 1.2544313326895178, "mean_action_processing_ms": 0.13427583305183213, "mean_env_wait_ms": 0.844396471243271, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8115200, "num_agent_steps_trained": 8115200, "num_env_steps_sampled": 4057600, "num_env_steps_trained": 4057600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4057600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8115200, "timers": {"training_iteration_time_ms": 3616.638, "learn_time_ms": 1109.991, "learn_throughput": 11531.625, "synch_weights_time_ms": 10.901}, "counters": {"num_env_steps_sampled": 4057600, "num_env_steps_trained": 4057600, "num_agent_steps_sampled": 8115200, "num_agent_steps_trained": 8115200}, "done": false, "episodes_total": 10144, "training_iteration": 317, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-05", "timestamp": 1666581665, "time_this_iter_s": 3.7974302768707275, "time_total_s": 1219.0652477741241, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1219.0652477741241, "timesteps_since_restore": 0, "iterations_since_restore": 317, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.01666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.32, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 19.02, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 18.81, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 18.62, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.0, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.82, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 18.62, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 18.62, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010754279792308807, "policy_loss": 0.0007261198479682207, "vf_loss": 7.674044132232666, "vf_explained_var": 0.5930687785148621, "kl": 0.0021228990517556667, "entropy": 0.8361895084381104, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4070400, "num_env_steps_trained": 4070400, "num_agent_steps_sampled": 8140800, "num_agent_steps_trained": 8140800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 594.12, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.06}, "custom_metrics": {"sparse_reward_mean": 205.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.32, "shaped_reward_min": 162, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.97, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 19.02, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.82, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 18.81, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 18.62, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.21, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.8, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 6.0, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.52, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.82, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 18.62, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 18.62, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7014010441921795, "mean_inference_ms": 1.2542360507656027, "mean_action_processing_ms": 0.13427080409938527, "mean_env_wait_ms": 0.8443139300228972, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 522.0, "episode_reward_mean": 594.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 261.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.06}, "hist_stats": {"episode_reward": [587.0, 627.0, 587.0, 630.0, 579.0, 582.0, 630.0, 582.0, 582.0, 636.0, 633.0, 582.0, 582.0, 627.0, 633.0, 587.0, 582.0, 582.0, 582.0, 522.0, 636.0, 630.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 573.0, 633.0, 570.0, 639.0, 587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 310.0, 317.0, 293.0, 294.0, 309.0, 321.0, 293.0, 286.0, 290.0, 292.0, 308.0, 322.0, 289.0, 293.0, 290.0, 292.0, 315.0, 321.0, 319.0, 314.0, 290.0, 292.0, 294.0, 288.0, 317.0, 310.0, 316.0, 317.0, 286.0, 301.0, 295.0, 287.0, 290.0, 292.0, 291.0, 291.0, 261.0, 261.0, 325.0, 311.0, 319.0, 311.0, 291.0, 288.0, 289.0, 290.0, 285.0, 291.0, 290.0, 289.0, 294.0, 288.0, 288.0, 291.0, 285.0, 288.0, 313.0, 320.0, 285.0, 285.0, 317.0, 322.0, 284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7014010441921795, "mean_inference_ms": 1.2542360507656027, "mean_action_processing_ms": 0.13427080409938527, "mean_env_wait_ms": 0.8443139300228972, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8140800, "num_agent_steps_trained": 8140800, "num_env_steps_sampled": 4070400, "num_env_steps_trained": 4070400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4070400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8140800, "timers": {"training_iteration_time_ms": 3624.456, "learn_time_ms": 1115.287, "learn_throughput": 11476.862, "synch_weights_time_ms": 11.228}, "counters": {"num_env_steps_sampled": 4070400, "num_env_steps_trained": 4070400, "num_agent_steps_sampled": 8140800, "num_agent_steps_trained": 8140800}, "done": false, "episodes_total": 10176, "training_iteration": 318, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-09", "timestamp": 1666581669, "time_this_iter_s": 3.6486549377441406, "time_total_s": 1222.7139027118683, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1222.7139027118683, "timesteps_since_restore": 0, "iterations_since_restore": 318, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.639999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.56, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 18.41, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.87, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 18.19, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.83, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.87, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 18.19, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.87, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 18.19, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013955773320049047, "policy_loss": 0.0010526780970394611, "vf_loss": 7.592601299285889, "vf_explained_var": 0.5996973514556885, "kl": 0.001881860545836389, "entropy": 0.8327209949493408, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4083200, "num_env_steps_trained": 4083200, "num_agent_steps_sampled": 8166400, "num_agent_steps_trained": 8166400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.76, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.88}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.56, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 28, "onion_pickup_agent_1_mean": 18.63, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 28, "useful_onion_pickup_agent_1_mean": 18.41, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.87, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 18.19, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 6.01, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.83, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.72, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.67, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.87, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 18.19, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.87, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 18.19, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7013202350924288, "mean_inference_ms": 1.2540252641135183, "mean_action_processing_ms": 0.1342621775402373, "mean_env_wait_ms": 0.8442089434128561, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 591.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.88}, "hist_stats": {"episode_reward": [587.0, 579.0, 633.0, 587.0, 576.0, 576.0, 579.0, 627.0, 579.0, 579.0, 630.0, 587.0, 573.0, 576.0, 582.0, 579.0, 573.0, 579.0, 630.0, 587.0, 570.0, 636.0, 579.0, 587.0, 627.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 584.0, 627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 303.0, 294.0, 285.0, 320.0, 313.0, 296.0, 291.0, 289.0, 287.0, 284.0, 292.0, 285.0, 294.0, 315.0, 312.0, 287.0, 292.0, 298.0, 281.0, 317.0, 313.0, 296.0, 291.0, 286.0, 287.0, 290.0, 286.0, 293.0, 289.0, 284.0, 295.0, 287.0, 286.0, 286.0, 293.0, 313.0, 317.0, 296.0, 291.0, 285.0, 285.0, 314.0, 322.0, 287.0, 292.0, 297.0, 290.0, 312.0, 315.0, 296.0, 286.0, 286.0, 296.0, 291.0, 291.0, 291.0, 291.0, 281.0, 295.0, 295.0, 284.0, 292.0, 292.0, 314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7013202350924288, "mean_inference_ms": 1.2540252641135183, "mean_action_processing_ms": 0.1342621775402373, "mean_env_wait_ms": 0.8442089434128561, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8166400, "num_agent_steps_trained": 8166400, "num_env_steps_sampled": 4083200, "num_env_steps_trained": 4083200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4083200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8166400, "timers": {"training_iteration_time_ms": 3614.178, "learn_time_ms": 1109.138, "learn_throughput": 11540.489, "synch_weights_time_ms": 11.693}, "counters": {"num_env_steps_sampled": 4083200, "num_env_steps_trained": 4083200, "num_agent_steps_sampled": 8166400, "num_agent_steps_trained": 8166400}, "done": false, "episodes_total": 10208, "training_iteration": 319, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-13", "timestamp": 1666581673, "time_this_iter_s": 3.5185139179229736, "time_total_s": 1226.2324166297913, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1226.2324166297913, "timesteps_since_restore": 0, "iterations_since_restore": 319, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.533333333333335, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.82, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.66, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.66, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.66, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008092686766758561, "policy_loss": 0.0004618025850504637, "vf_loss": 7.6494879722595215, "vf_explained_var": 0.5780466794967651, "kl": 0.002273120451718569, "entropy": 0.8349617719650269, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4096000, "num_env_steps_trained": 4096000, "num_agent_steps_sampled": 8192000, "num_agent_steps_trained": 8192000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 592.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 296.01}, "custom_metrics": {"sparse_reward_mean": 204.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.82, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.89, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.71, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.66, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.66, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.66, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7012302860360472, "mean_inference_ms": 1.2538165708631441, "mean_action_processing_ms": 0.13425232218221622, "mean_env_wait_ms": 0.8440993449119911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 592.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 296.01}, "hist_stats": {"episode_reward": [627.0, 582.0, 582.0, 579.0, 630.0, 630.0, 573.0, 579.0, 576.0, 587.0, 633.0, 636.0, 630.0, 579.0, 630.0, 579.0, 582.0, 579.0, 633.0, 587.0, 636.0, 579.0, 582.0, 582.0, 627.0, 627.0, 576.0, 579.0, 582.0, 582.0, 587.0, 579.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 313.0, 291.0, 291.0, 291.0, 291.0, 290.0, 289.0, 311.0, 319.0, 309.0, 321.0, 278.0, 295.0, 288.0, 291.0, 286.0, 290.0, 290.0, 297.0, 314.0, 319.0, 319.0, 317.0, 316.0, 314.0, 294.0, 285.0, 313.0, 317.0, 291.0, 288.0, 288.0, 294.0, 288.0, 291.0, 313.0, 320.0, 296.0, 291.0, 317.0, 319.0, 295.0, 284.0, 289.0, 293.0, 294.0, 288.0, 311.0, 316.0, 311.0, 316.0, 285.0, 291.0, 286.0, 293.0, 286.0, 296.0, 293.0, 289.0, 289.0, 298.0, 288.0, 291.0, 289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7012302860360472, "mean_inference_ms": 1.2538165708631441, "mean_action_processing_ms": 0.13425232218221622, "mean_env_wait_ms": 0.8440993449119911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8192000, "num_agent_steps_trained": 8192000, "num_env_steps_sampled": 4096000, "num_env_steps_trained": 4096000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4096000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8192000, "timers": {"training_iteration_time_ms": 3604.653, "learn_time_ms": 1110.183, "learn_throughput": 11529.626, "synch_weights_time_ms": 11.85}, "counters": {"num_env_steps_sampled": 4096000, "num_env_steps_trained": 4096000, "num_agent_steps_sampled": 8192000, "num_agent_steps_trained": 8192000}, "done": false, "episodes_total": 10240, "training_iteration": 320, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-16", "timestamp": 1666581676, "time_this_iter_s": 3.695418119430542, "time_total_s": 1229.9278347492218, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1229.9278347492218, "timesteps_since_restore": 0, "iterations_since_restore": 320, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.5, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.67, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.17, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 16.02, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00031242857221513987, "policy_loss": -0.0006594202714040875, "vf_loss": 7.624888896942139, "vf_explained_var": 0.5988132953643799, "kl": 0.0022885880898684263, "entropy": 0.83099365234375, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4108800, "num_env_steps_trained": 4108800, "num_agent_steps_sampled": 8217600, "num_agent_steps_trained": 8217600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 590.67, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.335}, "custom_metrics": {"sparse_reward_mean": 204.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.67, "shaped_reward_min": 165, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.17, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 17.76, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 16.02, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.39, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.94, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.39, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.39, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7011718559313173, "mean_inference_ms": 1.2536228958748068, "mean_action_processing_ms": 0.13424182848786143, "mean_env_wait_ms": 0.8439945384813131, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 525.0, "episode_reward_mean": 590.67, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.335}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 579.0, 630.0, 525.0, 587.0, 579.0, 579.0, 627.0, 579.0, 630.0, 579.0, 579.0, 630.0, 579.0, 579.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 579.0, 636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 292.0, 290.0, 289.0, 293.0, 282.0, 300.0, 288.0, 291.0, 286.0, 290.0, 293.0, 286.0, 287.0, 292.0, 317.0, 313.0, 259.0, 266.0, 293.0, 294.0, 288.0, 291.0, 291.0, 288.0, 321.0, 306.0, 285.0, 294.0, 314.0, 316.0, 288.0, 291.0, 298.0, 281.0, 316.0, 314.0, 294.0, 285.0, 293.0, 286.0, 292.0, 290.0, 294.0, 293.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 287.0, 292.0, 286.0, 290.0, 292.0, 287.0, 312.0, 318.0, 278.0, 298.0, 288.0, 291.0, 311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7011718559313173, "mean_inference_ms": 1.2536228958748068, "mean_action_processing_ms": 0.13424182848786143, "mean_env_wait_ms": 0.8439945384813131, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8217600, "num_agent_steps_trained": 8217600, "num_env_steps_sampled": 4108800, "num_env_steps_trained": 4108800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4108800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8217600, "timers": {"training_iteration_time_ms": 3593.772, "learn_time_ms": 1104.0, "learn_throughput": 11594.207, "synch_weights_time_ms": 12.066}, "counters": {"num_env_steps_sampled": 4108800, "num_env_steps_trained": 4108800, "num_agent_steps_sampled": 8217600, "num_agent_steps_trained": 8217600}, "done": false, "episodes_total": 10272, "training_iteration": 321, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-20", "timestamp": 1666581680, "time_this_iter_s": 3.6758861541748047, "time_total_s": 1233.6037209033966, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1233.6037209033966, "timesteps_since_restore": 0, "iterations_since_restore": 321, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.14, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 182.36, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.64, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002883757231757045, "policy_loss": -0.0032353729475289583, "vf_loss": 7.676279067993164, "vf_explained_var": 0.5991020202636719, "kl": 0.0022375802509486675, "entropy": 0.8320217728614807, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4121600, "num_env_steps_trained": 4121600, "num_agent_steps_sampled": 8243200, "num_agent_steps_trained": 8243200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 588.76, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 294.38}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 182.36, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.73, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 27, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.64, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 27, "useful_onion_pickup_agent_1_mean": 17.87, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.33, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 27, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.04, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.87, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.33, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 27, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.33, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 27, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7011350387026024, "mean_inference_ms": 1.253450808715653, "mean_action_processing_ms": 0.13423384633768787, "mean_env_wait_ms": 0.8439112070475474, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 588.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 294.38}, "hist_stats": {"episode_reward": [636.0, 636.0, 579.0, 579.0, 582.0, 587.0, 576.0, 579.0, 636.0, 573.0, 582.0, 587.0, 582.0, 636.0, 587.0, 576.0, 636.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 630.0, 627.0, 633.0, 579.0, 579.0, 576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 325.0, 315.0, 321.0, 294.0, 285.0, 296.0, 283.0, 287.0, 295.0, 288.0, 299.0, 291.0, 285.0, 291.0, 288.0, 315.0, 321.0, 278.0, 295.0, 286.0, 296.0, 292.0, 295.0, 289.0, 293.0, 313.0, 323.0, 291.0, 296.0, 288.0, 288.0, 316.0, 320.0, 293.0, 286.0, 289.0, 287.0, 292.0, 290.0, 291.0, 291.0, 290.0, 292.0, 292.0, 290.0, 288.0, 291.0, 284.0, 298.0, 288.0, 294.0, 280.0, 293.0, 311.0, 319.0, 312.0, 315.0, 317.0, 316.0, 287.0, 292.0, 281.0, 298.0, 291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7011350387026024, "mean_inference_ms": 1.253450808715653, "mean_action_processing_ms": 0.13423384633768787, "mean_env_wait_ms": 0.8439112070475474, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8243200, "num_agent_steps_trained": 8243200, "num_env_steps_sampled": 4121600, "num_env_steps_trained": 4121600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4121600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8243200, "timers": {"training_iteration_time_ms": 3605.198, "learn_time_ms": 1104.684, "learn_throughput": 11587.023, "synch_weights_time_ms": 12.156}, "counters": {"num_env_steps_sampled": 4121600, "num_env_steps_trained": 4121600, "num_agent_steps_sampled": 8243200, "num_agent_steps_trained": 8243200}, "done": false, "episodes_total": 10304, "training_iteration": 322, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-24", "timestamp": 1666581684, "time_this_iter_s": 3.7152676582336426, "time_total_s": 1237.3189885616302, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1237.3189885616302, "timesteps_since_restore": 0, "iterations_since_restore": 322, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.816666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 181.67, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.46, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00010902888607233763, "policy_loss": -0.0002475515939295292, "vf_loss": 7.680278301239014, "vf_explained_var": 0.5841261744499207, "kl": 0.0024923328310251236, "entropy": 0.8228923082351685, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4134400, "num_env_steps_trained": 4134400, "num_agent_steps_sampled": 8268800, "num_agent_steps_trained": 8268800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 586.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.435}, "custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 181.67, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.55, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.99, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.46, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.81, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.17, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.68, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.13, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.99, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.17, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.68, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.17, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.68, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010995745847822, "mean_inference_ms": 1.2532775692677478, "mean_action_processing_ms": 0.1342266094642704, "mean_env_wait_ms": 0.8438295517099426, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 586.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.435}, "hist_stats": {"episode_reward": [576.0, 579.0, 587.0, 582.0, 576.0, 582.0, 582.0, 579.0, 633.0, 582.0, 587.0, 630.0, 570.0, 633.0, 582.0, 582.0, 579.0, 582.0, 596.0, 576.0, 627.0, 579.0, 582.0, 582.0, 587.0, 630.0, 582.0, 582.0, 633.0, 636.0, 582.0, 582.0, 633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 285.0, 286.0, 293.0, 295.0, 292.0, 291.0, 291.0, 283.0, 293.0, 286.0, 296.0, 292.0, 290.0, 296.0, 283.0, 316.0, 317.0, 292.0, 290.0, 297.0, 290.0, 314.0, 316.0, 289.0, 281.0, 319.0, 314.0, 294.0, 288.0, 292.0, 290.0, 287.0, 292.0, 286.0, 296.0, 299.0, 297.0, 288.0, 288.0, 312.0, 315.0, 284.0, 295.0, 291.0, 291.0, 286.0, 296.0, 291.0, 296.0, 320.0, 310.0, 289.0, 293.0, 289.0, 293.0, 314.0, 319.0, 318.0, 318.0, 292.0, 290.0, 294.0, 288.0, 317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010995745847822, "mean_inference_ms": 1.2532775692677478, "mean_action_processing_ms": 0.1342266094642704, "mean_env_wait_ms": 0.8438295517099426, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8268800, "num_agent_steps_trained": 8268800, "num_env_steps_sampled": 4134400, "num_env_steps_trained": 4134400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4134400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8268800, "timers": {"training_iteration_time_ms": 3616.709, "learn_time_ms": 1114.568, "learn_throughput": 11484.274, "synch_weights_time_ms": 11.969}, "counters": {"num_env_steps_sampled": 4134400, "num_env_steps_trained": 4134400, "num_agent_steps_sampled": 8268800, "num_agent_steps_trained": 8268800}, "done": false, "episodes_total": 10336, "training_iteration": 323, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-28", "timestamp": 1666581688, "time_this_iter_s": 3.724001884460449, "time_total_s": 1241.0429904460907, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1241.0429904460907, "timesteps_since_restore": 0, "iterations_since_restore": 323, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.56, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.1, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010833744890987873, "policy_loss": -0.001441783970221877, "vf_loss": 7.687665939331055, "vf_explained_var": 0.5905911922454834, "kl": 0.002230637241154909, "entropy": 0.8207129240036011, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4147200, "num_env_steps_trained": 4147200, "num_agent_steps_sampled": 8294400, "num_agent_steps_trained": 8294400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 582.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 291.28}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 180.56, "shaped_reward_min": 82, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.1, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.91, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.74, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.79, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.5, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.74, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.74, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010663356915461, "mean_inference_ms": 1.2531089378273157, "mean_action_processing_ms": 0.13422209832165993, "mean_env_wait_ms": 0.8437644807329641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 242.0, "episode_reward_mean": 582.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 291.28}, "hist_stats": {"episode_reward": [633.0, 579.0, 579.0, 570.0, 582.0, 576.0, 587.0, 639.0, 582.0, 582.0, 584.0, 576.0, 570.0, 579.0, 627.0, 630.0, 579.0, 573.0, 587.0, 579.0, 242.0, 570.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 582.0, 582.0, 627.0, 590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 316.0, 283.0, 296.0, 292.0, 287.0, 289.0, 281.0, 293.0, 289.0, 287.0, 289.0, 294.0, 293.0, 319.0, 320.0, 280.0, 302.0, 294.0, 288.0, 290.0, 294.0, 299.0, 277.0, 285.0, 285.0, 285.0, 294.0, 310.0, 317.0, 315.0, 315.0, 288.0, 291.0, 278.0, 295.0, 286.0, 301.0, 291.0, 288.0, 117.0, 125.0, 282.0, 288.0, 315.0, 321.0, 290.0, 292.0, 280.0, 302.0, 288.0, 294.0, 291.0, 291.0, 293.0, 294.0, 290.0, 292.0, 294.0, 288.0, 292.0, 290.0, 307.0, 320.0, 295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010663356915461, "mean_inference_ms": 1.2531089378273157, "mean_action_processing_ms": 0.13422209832165993, "mean_env_wait_ms": 0.8437644807329641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8294400, "num_agent_steps_trained": 8294400, "num_env_steps_sampled": 4147200, "num_env_steps_trained": 4147200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4147200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8294400, "timers": {"training_iteration_time_ms": 3613.576, "learn_time_ms": 1108.298, "learn_throughput": 11549.24, "synch_weights_time_ms": 11.53}, "counters": {"num_env_steps_sampled": 4147200, "num_env_steps_trained": 4147200, "num_agent_steps_sampled": 8294400, "num_agent_steps_trained": 8294400}, "done": false, "episodes_total": 10368, "training_iteration": 324, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-32", "timestamp": 1666581692, "time_this_iter_s": 3.701406240463257, "time_total_s": 1244.744396686554, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1244.744396686554, "timesteps_since_restore": 0, "iterations_since_restore": 324, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.7, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.05, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.37, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 7.551535964012146e-05, "policy_loss": -0.0002647839719429612, "vf_loss": 7.565235137939453, "vf_explained_var": 0.5844516754150391, "kl": 0.0024913917295634747, "entropy": 0.8324460983276367, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4160000, "num_env_steps_trained": 4160000, "num_agent_steps_sampled": 8320000, "num_agent_steps_trained": 8320000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 587.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.825}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 182.05, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.53, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 18.07, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.37, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.88, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.73, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.08, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.66, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.73, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.73, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010020568901624, "mean_inference_ms": 1.2530853564939755, "mean_action_processing_ms": 0.1342152056410183, "mean_env_wait_ms": 0.8437793845031866, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 587.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.825}, "hist_stats": {"episode_reward": [590.0, 582.0, 630.0, 590.0, 576.0, 579.0, 636.0, 587.0, 576.0, 579.0, 582.0, 573.0, 576.0, 627.0, 582.0, 582.0, 630.0, 579.0, 582.0, 579.0, 636.0, 579.0, 579.0, 630.0, 584.0, 579.0, 630.0, 582.0, 582.0, 582.0, 582.0, 576.0, 579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 295.0, 289.0, 293.0, 307.0, 323.0, 291.0, 299.0, 286.0, 290.0, 288.0, 291.0, 320.0, 316.0, 283.0, 304.0, 290.0, 286.0, 285.0, 294.0, 295.0, 287.0, 289.0, 284.0, 288.0, 288.0, 312.0, 315.0, 293.0, 289.0, 287.0, 295.0, 308.0, 322.0, 286.0, 293.0, 284.0, 298.0, 291.0, 288.0, 311.0, 325.0, 287.0, 292.0, 290.0, 289.0, 320.0, 310.0, 292.0, 292.0, 287.0, 292.0, 317.0, 313.0, 288.0, 294.0, 289.0, 293.0, 289.0, 293.0, 289.0, 293.0, 285.0, 291.0, 291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7010020568901624, "mean_inference_ms": 1.2530853564939755, "mean_action_processing_ms": 0.1342152056410183, "mean_env_wait_ms": 0.8437793845031866, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8320000, "num_agent_steps_trained": 8320000, "num_env_steps_sampled": 4160000, "num_env_steps_trained": 4160000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4160000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8320000, "timers": {"training_iteration_time_ms": 3657.697, "learn_time_ms": 1121.055, "learn_throughput": 11417.814, "synch_weights_time_ms": 11.904}, "counters": {"num_env_steps_sampled": 4160000, "num_env_steps_trained": 4160000, "num_agent_steps_sampled": 8320000, "num_agent_steps_trained": 8320000}, "done": false, "episodes_total": 10400, "training_iteration": 325, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-36", "timestamp": 1666581696, "time_this_iter_s": 4.0308966636657715, "time_total_s": 1248.7752933502197, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1248.7752933502197, "timesteps_since_restore": 0, "iterations_since_restore": 325, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.500000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.45, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.32, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.84, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.84, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.84, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0042573739774525166, "policy_loss": 0.0038920738734304905, "vf_loss": 7.782495975494385, "vf_explained_var": 0.5672118663787842, "kl": 0.0021648143883794546, "entropy": 0.8258967399597168, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4172800, "num_env_steps_trained": 4172800, "num_agent_steps_sampled": 8345600, "num_agent_steps_trained": 8345600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 585.85, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.925}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.45, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.32, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.11, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.84, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.98, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.89, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.73, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.59, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 14.84, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.98, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.84, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.98, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7009214126610166, "mean_inference_ms": 1.253050791965419, "mean_action_processing_ms": 0.13420651041820075, "mean_env_wait_ms": 0.8437815261311539, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 585.85, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.925}, "hist_stats": {"episode_reward": [579.0, 579.0, 584.0, 519.0, 533.0, 579.0, 584.0, 584.0, 579.0, 579.0, 582.0, 570.0, 582.0, 582.0, 593.0, 536.0, 582.0, 579.0, 582.0, 579.0, 570.0, 579.0, 630.0, 627.0, 530.0, 587.0, 579.0, 576.0, 522.0, 582.0, 582.0, 587.0, 579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 292.0, 287.0, 294.0, 290.0, 256.0, 263.0, 268.0, 265.0, 287.0, 292.0, 293.0, 291.0, 290.0, 294.0, 284.0, 295.0, 287.0, 292.0, 293.0, 289.0, 292.0, 278.0, 290.0, 292.0, 290.0, 292.0, 297.0, 296.0, 265.0, 271.0, 290.0, 292.0, 288.0, 291.0, 289.0, 293.0, 288.0, 291.0, 286.0, 284.0, 286.0, 293.0, 313.0, 317.0, 309.0, 318.0, 266.0, 264.0, 300.0, 287.0, 282.0, 297.0, 288.0, 288.0, 256.0, 266.0, 292.0, 290.0, 290.0, 292.0, 296.0, 291.0, 286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7009214126610166, "mean_inference_ms": 1.253050791965419, "mean_action_processing_ms": 0.13420651041820075, "mean_env_wait_ms": 0.8437815261311539, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8345600, "num_agent_steps_trained": 8345600, "num_env_steps_sampled": 4172800, "num_env_steps_trained": 4172800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4172800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8345600, "timers": {"training_iteration_time_ms": 3657.009, "learn_time_ms": 1120.872, "learn_throughput": 11419.68, "synch_weights_time_ms": 11.883}, "counters": {"num_env_steps_sampled": 4172800, "num_env_steps_trained": 4172800, "num_agent_steps_sampled": 8345600, "num_agent_steps_trained": 8345600}, "done": false, "episodes_total": 10432, "training_iteration": 326, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-40", "timestamp": 1666581700, "time_this_iter_s": 3.6777994632720947, "time_total_s": 1252.4530928134918, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1252.4530928134918, "timesteps_since_restore": 0, "iterations_since_restore": 326, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.74, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.82, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.7, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.87, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.7, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.7, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001463708933442831, "policy_loss": 0.0011175316758453846, "vf_loss": 7.6503777503967285, "vf_explained_var": 0.5795704126358032, "kl": 0.002307520480826497, "entropy": 0.8377187848091125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4185600, "num_env_steps_trained": 4185600, "num_agent_steps_sampled": 8371200, "num_agent_steps_trained": 8371200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 587.82, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.91}, "custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.82, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 18.5, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.88, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 18.3, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.7, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.16, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 6.04, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.87, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.67, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.7, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.16, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.7, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.16, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7008398055665644, "mean_inference_ms": 1.2530141301165323, "mean_action_processing_ms": 0.13419647676777532, "mean_env_wait_ms": 0.8437727731291801, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 587.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.91}, "hist_stats": {"episode_reward": [579.0, 582.0, 630.0, 581.0, 582.0, 633.0, 582.0, 582.0, 576.0, 584.0, 582.0, 630.0, 584.0, 582.0, 570.0, 582.0, 633.0, 576.0, 576.0, 579.0, 579.0, 633.0, 579.0, 582.0, 636.0, 579.0, 587.0, 630.0, 630.0, 627.0, 582.0, 579.0, 582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 290.0, 292.0, 315.0, 315.0, 290.0, 291.0, 293.0, 289.0, 319.0, 314.0, 293.0, 289.0, 288.0, 294.0, 287.0, 289.0, 286.0, 298.0, 291.0, 291.0, 312.0, 318.0, 300.0, 284.0, 291.0, 291.0, 286.0, 284.0, 285.0, 297.0, 317.0, 316.0, 286.0, 290.0, 291.0, 285.0, 289.0, 290.0, 290.0, 289.0, 318.0, 315.0, 285.0, 294.0, 286.0, 296.0, 319.0, 317.0, 283.0, 296.0, 290.0, 297.0, 307.0, 323.0, 314.0, 316.0, 309.0, 318.0, 298.0, 284.0, 295.0, 284.0, 292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7008398055665644, "mean_inference_ms": 1.2530141301165323, "mean_action_processing_ms": 0.13419647676777532, "mean_env_wait_ms": 0.8437727731291801, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8371200, "num_agent_steps_trained": 8371200, "num_env_steps_sampled": 4185600, "num_env_steps_trained": 4185600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4185600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8371200, "timers": {"training_iteration_time_ms": 3643.802, "learn_time_ms": 1107.309, "learn_throughput": 11559.553, "synch_weights_time_ms": 12.586}, "counters": {"num_env_steps_sampled": 4185600, "num_env_steps_trained": 4185600, "num_agent_steps_sampled": 8371200, "num_agent_steps_trained": 8371200}, "done": false, "episodes_total": 10464, "training_iteration": 327, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-44", "timestamp": 1666581704, "time_this_iter_s": 3.6818816661834717, "time_total_s": 1256.1349744796753, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1256.1349744796753, "timesteps_since_restore": 0, "iterations_since_restore": 327, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.816666666666666, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.77, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004695032839663327, "policy_loss": 0.00012688391143456101, "vf_loss": 7.660691261291504, "vf_explained_var": 0.5659324526786804, "kl": 0.0022306411992758512, "entropy": 0.846896231174469, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4198400, "num_env_steps_trained": 4198400, "num_agent_steps_sampled": 8396800, "num_agent_steps_trained": 8396800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 585.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.585}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.77, "shaped_reward_min": 153, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.27, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 18.26, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 18.03, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.84, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.56, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.53, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.84, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.84, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7007466682641271, "mean_inference_ms": 1.2528424832108607, "mean_action_processing_ms": 0.13418538861990456, "mean_env_wait_ms": 0.8436620774338445, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 513.0, "episode_reward_mean": 585.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.585}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 639.0, 579.0, 579.0, 582.0, 579.0, 582.0, 587.0, 633.0, 525.0, 579.0, 582.0, 576.0, 582.0, 579.0, 513.0, 582.0, 579.0, 582.0, 576.0, 639.0, 582.0, 579.0, 630.0, 630.0, 579.0, 636.0, 582.0, 582.0, 579.0, 582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 283.0, 296.0, 293.0, 289.0, 319.0, 320.0, 288.0, 291.0, 287.0, 292.0, 288.0, 294.0, 289.0, 290.0, 291.0, 291.0, 292.0, 295.0, 316.0, 317.0, 259.0, 266.0, 278.0, 301.0, 294.0, 288.0, 291.0, 285.0, 290.0, 292.0, 289.0, 290.0, 244.0, 269.0, 283.0, 299.0, 286.0, 293.0, 290.0, 292.0, 289.0, 287.0, 321.0, 318.0, 290.0, 292.0, 292.0, 287.0, 309.0, 321.0, 317.0, 313.0, 289.0, 290.0, 311.0, 325.0, 287.0, 295.0, 295.0, 287.0, 287.0, 292.0, 291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7007466682641271, "mean_inference_ms": 1.2528424832108607, "mean_action_processing_ms": 0.13418538861990456, "mean_env_wait_ms": 0.8436620774338445, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8396800, "num_agent_steps_trained": 8396800, "num_env_steps_sampled": 4198400, "num_env_steps_trained": 4198400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4198400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8396800, "timers": {"training_iteration_time_ms": 3632.043, "learn_time_ms": 1099.786, "learn_throughput": 11638.625, "synch_weights_time_ms": 12.684}, "counters": {"num_env_steps_sampled": 4198400, "num_env_steps_trained": 4198400, "num_agent_steps_sampled": 8396800, "num_agent_steps_trained": 8396800}, "done": false, "episodes_total": 10496, "training_iteration": 328, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-47", "timestamp": 1666581707, "time_this_iter_s": 3.521747350692749, "time_total_s": 1259.656721830368, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1259.656721830368, "timesteps_since_restore": 0, "iterations_since_restore": 328, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.4, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.7, "shaped_reward_min": 144, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.23, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.29, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.23, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.29, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.23, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.29, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0028664914425462484, "policy_loss": 0.002509418409317732, "vf_loss": 7.775605201721191, "vf_explained_var": 0.5675947666168213, "kl": 0.0030672703869640827, "entropy": 0.8409721255302429, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4211200, "num_env_steps_trained": 4211200, "num_agent_steps_sampled": 8422400, "num_agent_steps_trained": 8422400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 504.0, "episode_reward_mean": 582.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 291.45}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.7, "shaped_reward_min": 144, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.41, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.23, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.29, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.83, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.75, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.23, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.29, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.23, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.29, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7006761995941965, "mean_inference_ms": 1.2526628427401818, "mean_action_processing_ms": 0.13417690551647285, "mean_env_wait_ms": 0.8435719265710255, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 504.0, "episode_reward_mean": 582.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 291.45}, "hist_stats": {"episode_reward": [582.0, 630.0, 573.0, 579.0, 582.0, 579.0, 584.0, 579.0, 582.0, 582.0, 525.0, 579.0, 579.0, 584.0, 587.0, 582.0, 630.0, 582.0, 579.0, 579.0, 582.0, 630.0, 573.0, 576.0, 576.0, 576.0, 582.0, 582.0, 579.0, 530.0, 582.0, 582.0, 576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 317.0, 313.0, 283.0, 290.0, 289.0, 290.0, 293.0, 289.0, 290.0, 289.0, 294.0, 290.0, 293.0, 286.0, 285.0, 297.0, 292.0, 290.0, 266.0, 259.0, 294.0, 285.0, 290.0, 289.0, 292.0, 292.0, 296.0, 291.0, 286.0, 296.0, 317.0, 313.0, 285.0, 297.0, 288.0, 291.0, 290.0, 289.0, 293.0, 289.0, 307.0, 323.0, 290.0, 283.0, 286.0, 290.0, 292.0, 284.0, 290.0, 286.0, 295.0, 287.0, 292.0, 290.0, 290.0, 289.0, 260.0, 270.0, 291.0, 291.0, 286.0, 296.0, 283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7006761995941965, "mean_inference_ms": 1.2526628427401818, "mean_action_processing_ms": 0.13417690551647285, "mean_env_wait_ms": 0.8435719265710255, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8422400, "num_agent_steps_trained": 8422400, "num_env_steps_sampled": 4211200, "num_env_steps_trained": 4211200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4211200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8422400, "timers": {"training_iteration_time_ms": 3632.868, "learn_time_ms": 1097.298, "learn_throughput": 11665.015, "synch_weights_time_ms": 12.013}, "counters": {"num_env_steps_sampled": 4211200, "num_env_steps_trained": 4211200, "num_agent_steps_sampled": 8422400, "num_agent_steps_trained": 8422400}, "done": false, "episodes_total": 10528, "training_iteration": 329, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-51", "timestamp": 1666581711, "time_this_iter_s": 3.5217649936676025, "time_total_s": 1263.1784868240356, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1263.1784868240356, "timesteps_since_restore": 0, "iterations_since_restore": 329, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.32, "shaped_reward_min": 144, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 17.82, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 17.69, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.004220202565193176, "policy_loss": 0.003862160723656416, "vf_loss": 7.747307777404785, "vf_explained_var": 0.5820736885070801, "kl": 0.002677002688869834, "entropy": 0.8333747386932373, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4224000, "num_env_steps_trained": 4224000, "num_agent_steps_sampled": 8448000, "num_agent_steps_trained": 8448000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 504.0, "episode_reward_mean": 585.12, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 292.56}, "custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.32, "shaped_reward_min": 144, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 31, "onion_pickup_agent_1_mean": 17.82, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 15.35, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 31, "useful_onion_pickup_agent_1_mean": 17.69, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 30, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 5.8, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.51, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 10, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.18, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.47, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 10, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 10, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 30, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 30, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.70060102924662, "mean_inference_ms": 1.252481111686981, "mean_action_processing_ms": 0.13416844921696647, "mean_env_wait_ms": 0.8434800010347309, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 504.0, "episode_reward_mean": 585.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 292.56}, "hist_stats": {"episode_reward": [576.0, 582.0, 582.0, 630.0, 633.0, 627.0, 624.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 587.0, 573.0, 582.0, 570.0, 573.0, 582.0, 573.0, 587.0, 579.0, 579.0, 573.0, 636.0, 582.0, 579.0, 579.0, 636.0, 570.0, 579.0, 579.0, 582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 294.0, 288.0, 289.0, 293.0, 319.0, 311.0, 320.0, 313.0, 313.0, 314.0, 311.0, 313.0, 292.0, 290.0, 283.0, 290.0, 292.0, 290.0, 293.0, 286.0, 292.0, 287.0, 282.0, 297.0, 290.0, 297.0, 285.0, 288.0, 294.0, 288.0, 286.0, 284.0, 287.0, 286.0, 292.0, 290.0, 284.0, 289.0, 298.0, 289.0, 293.0, 286.0, 294.0, 285.0, 284.0, 289.0, 318.0, 318.0, 289.0, 293.0, 291.0, 288.0, 291.0, 288.0, 316.0, 320.0, 279.0, 291.0, 289.0, 290.0, 291.0, 288.0, 288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.70060102924662, "mean_inference_ms": 1.252481111686981, "mean_action_processing_ms": 0.13416844921696647, "mean_env_wait_ms": 0.8434800010347309, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8448000, "num_agent_steps_trained": 8448000, "num_env_steps_sampled": 4224000, "num_env_steps_trained": 4224000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4224000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8448000, "timers": {"training_iteration_time_ms": 3635.768, "learn_time_ms": 1102.793, "learn_throughput": 11606.888, "synch_weights_time_ms": 12.53}, "counters": {"num_env_steps_sampled": 4224000, "num_env_steps_trained": 4224000, "num_agent_steps_sampled": 8448000, "num_agent_steps_trained": 8448000}, "done": false, "episodes_total": 10560, "training_iteration": 330, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-55", "timestamp": 1666581715, "time_this_iter_s": 3.7308542728424072, "time_total_s": 1266.909341096878, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1266.909341096878, "timesteps_since_restore": 0, "iterations_since_restore": 330, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.05, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.78, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.69, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.3, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.35, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.35, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.35, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003572438843548298, "policy_loss": -0.003936300054192543, "vf_loss": 7.806034088134766, "vf_explained_var": 0.5505003929138184, "kl": 0.0024001419078558683, "entropy": 0.8334838151931763, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4236800, "num_env_steps_trained": 4236800, "num_agent_steps_sampled": 8473600, "num_agent_steps_trained": 8473600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 583.38, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.69}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.78, "shaped_reward_min": 144, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.69, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 30, "useful_onion_pickup_agent_0_mean": 14.58, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 18.3, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 30, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.35, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 18.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 30, "dish_pickup_agent_0_mean": 6.03, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.17, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.65, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.62, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.35, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 18.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 30, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.35, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 18.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 30, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7005318625687796, "mean_inference_ms": 1.2523039495697514, "mean_action_processing_ms": 0.1341592330617022, "mean_env_wait_ms": 0.8433814070155357, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 504.0, "episode_reward_mean": 583.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 249.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.69}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 576.0, 567.0, 579.0, 627.0, 633.0, 587.0, 582.0, 579.0, 579.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 504.0, 582.0, 576.0, 579.0, 579.0, 573.0, 570.0, 579.0, 527.0, 579.0, 630.0, 579.0, 567.0, 582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 288.0, 288.0, 283.0, 290.0, 278.0, 298.0, 287.0, 280.0, 290.0, 289.0, 307.0, 320.0, 319.0, 314.0, 300.0, 287.0, 291.0, 291.0, 287.0, 292.0, 285.0, 294.0, 281.0, 298.0, 284.0, 292.0, 291.0, 291.0, 288.0, 294.0, 292.0, 290.0, 291.0, 291.0, 295.0, 287.0, 255.0, 249.0, 286.0, 296.0, 290.0, 286.0, 286.0, 293.0, 292.0, 287.0, 284.0, 289.0, 276.0, 294.0, 290.0, 289.0, 263.0, 264.0, 290.0, 289.0, 317.0, 313.0, 293.0, 286.0, 289.0, 278.0, 287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7005318625687796, "mean_inference_ms": 1.2523039495697514, "mean_action_processing_ms": 0.1341592330617022, "mean_env_wait_ms": 0.8433814070155357, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8473600, "num_agent_steps_trained": 8473600, "num_env_steps_sampled": 4236800, "num_env_steps_trained": 4236800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4236800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8473600, "timers": {"training_iteration_time_ms": 3635.238, "learn_time_ms": 1108.179, "learn_throughput": 11550.484, "synch_weights_time_ms": 13.173}, "counters": {"num_env_steps_sampled": 4236800, "num_env_steps_trained": 4236800, "num_agent_steps_sampled": 8473600, "num_agent_steps_trained": 8473600}, "done": false, "episodes_total": 10592, "training_iteration": 331, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-21-59", "timestamp": 1666581719, "time_this_iter_s": 3.6568710803985596, "time_total_s": 1270.5662121772766, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1270.5662121772766, "timesteps_since_restore": 0, "iterations_since_restore": 331, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.54, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.58, "shaped_reward_min": 159, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.63, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.1, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 3.2392097637057304e-05, "policy_loss": -0.00033404817804694176, "vf_loss": 7.88044548034668, "vf_explained_var": 0.5614603757858276, "kl": 0.002426392398774624, "entropy": 0.8432062864303589, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4249600, "num_env_steps_trained": 4249600, "num_agent_steps_sampled": 8499200, "num_agent_steps_trained": 8499200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 519.0, "episode_reward_mean": 581.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 290.59}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.58, "shaped_reward_min": 159, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.78, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 18.29, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.63, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 18.1, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.46, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.94, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 11, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.74, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 11, "useful_dish_pickup_agent_1_mean": 4.72, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.63, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.57, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.6, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.46, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.94, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.46, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.94, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7004575792416293, "mean_inference_ms": 1.2521147919128353, "mean_action_processing_ms": 0.1341505685088555, "mean_env_wait_ms": 0.8432758339878859, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 519.0, "episode_reward_mean": 581.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 257.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 290.59}, "hist_stats": {"episode_reward": [582.0, 587.0, 579.0, 579.0, 633.0, 581.0, 579.0, 579.0, 627.0, 582.0, 582.0, 573.0, 584.0, 582.0, 579.0, 627.0, 582.0, 627.0, 584.0, 584.0, 581.0, 582.0, 630.0, 576.0, 579.0, 587.0, 582.0, 527.0, 584.0, 587.0, 579.0, 582.0, 576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 295.0, 289.0, 298.0, 288.0, 291.0, 287.0, 292.0, 315.0, 318.0, 294.0, 287.0, 283.0, 296.0, 282.0, 297.0, 315.0, 312.0, 294.0, 288.0, 290.0, 292.0, 278.0, 295.0, 283.0, 301.0, 293.0, 289.0, 289.0, 290.0, 309.0, 318.0, 294.0, 288.0, 308.0, 319.0, 285.0, 299.0, 295.0, 289.0, 283.0, 298.0, 298.0, 284.0, 310.0, 320.0, 284.0, 292.0, 292.0, 287.0, 290.0, 297.0, 293.0, 289.0, 258.0, 269.0, 285.0, 299.0, 285.0, 302.0, 290.0, 289.0, 291.0, 291.0, 285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7004575792416293, "mean_inference_ms": 1.2521147919128353, "mean_action_processing_ms": 0.1341505685088555, "mean_env_wait_ms": 0.8432758339878859, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8499200, "num_agent_steps_trained": 8499200, "num_env_steps_sampled": 4249600, "num_env_steps_trained": 4249600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4249600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8499200, "timers": {"training_iteration_time_ms": 3630.316, "learn_time_ms": 1112.124, "learn_throughput": 11509.508, "synch_weights_time_ms": 12.733}, "counters": {"num_env_steps_sampled": 4249600, "num_env_steps_trained": 4249600, "num_agent_steps_sampled": 8499200, "num_agent_steps_trained": 8499200}, "done": false, "episodes_total": 10624, "training_iteration": 332, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-03", "timestamp": 1666581723, "time_this_iter_s": 3.664041042327881, "time_total_s": 1274.2302532196045, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1274.2302532196045, "timesteps_since_restore": 0, "iterations_since_restore": 332, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.71666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 200.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.92, "shaped_reward_min": 150, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.8, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0002707752864807844, "policy_loss": -0.0006344539579004049, "vf_loss": 7.797179222106934, "vf_explained_var": 0.5710536241531372, "kl": 0.003583373501896858, "entropy": 0.8320763111114502, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4262400, "num_env_steps_trained": 4262400, "num_agent_steps_sampled": 8524800, "num_agent_steps_trained": 8524800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 390.0, "episode_reward_mean": 579.72, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.86}, "custom_metrics": {"sparse_reward_mean": 200.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.92, "shaped_reward_min": 150, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.96, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 18.0, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.8, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.74, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.63, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.92, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.79, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.72, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.65, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.58, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.74, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.63, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.74, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.63, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.700378879793017, "mean_inference_ms": 1.2519036473483978, "mean_action_processing_ms": 0.13413895914133342, "mean_env_wait_ms": 0.8431548787503442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 390.0, "episode_reward_mean": 579.72, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.86}, "hist_stats": {"episode_reward": [576.0, 627.0, 587.0, 579.0, 579.0, 525.0, 576.0, 576.0, 539.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 582.0, 584.0, 630.0, 582.0, 573.0, 573.0, 582.0, 579.0, 579.0, 630.0, 576.0, 630.0, 579.0, 579.0, 582.0, 567.0, 582.0, 576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 316.0, 311.0, 298.0, 289.0, 292.0, 287.0, 284.0, 295.0, 260.0, 265.0, 290.0, 286.0, 285.0, 291.0, 269.0, 270.0, 283.0, 293.0, 283.0, 290.0, 289.0, 290.0, 293.0, 286.0, 284.0, 286.0, 314.0, 319.0, 290.0, 292.0, 294.0, 290.0, 317.0, 313.0, 293.0, 289.0, 283.0, 290.0, 274.0, 299.0, 289.0, 293.0, 292.0, 287.0, 286.0, 293.0, 312.0, 318.0, 287.0, 289.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 291.0, 291.0, 290.0, 277.0, 291.0, 291.0, 290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.700378879793017, "mean_inference_ms": 1.2519036473483978, "mean_action_processing_ms": 0.13413895914133342, "mean_env_wait_ms": 0.8431548787503442, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8524800, "num_agent_steps_trained": 8524800, "num_env_steps_sampled": 4262400, "num_env_steps_trained": 4262400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4262400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8524800, "timers": {"training_iteration_time_ms": 3635.178, "learn_time_ms": 1121.744, "learn_throughput": 11410.806, "synch_weights_time_ms": 13.851}, "counters": {"num_env_steps_sampled": 4262400, "num_env_steps_trained": 4262400, "num_agent_steps_sampled": 8524800, "num_agent_steps_trained": 8524800}, "done": false, "episodes_total": 10656, "training_iteration": 333, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-07", "timestamp": 1666581727, "time_this_iter_s": 3.7733590602874756, "time_total_s": 1278.003612279892, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1278.003612279892, "timesteps_since_restore": 0, "iterations_since_restore": 333, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.650000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.8, "shaped_reward_min": 150, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.62, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.44, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.35, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.01, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.35, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.01, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.35, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.01, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00029662175802513957, "policy_loss": -5.7847180869430304e-05, "vf_loss": 7.726676940917969, "vf_explained_var": 0.5506268739700317, "kl": 0.0021445024758577347, "entropy": 0.8363960981369019, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4275200, "num_env_steps_trained": 4275200, "num_agent_steps_sampled": 8550400, "num_agent_steps_trained": 8550400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 390.0, "episode_reward_mean": 578.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.4}, "custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 178.8, "shaped_reward_min": 150, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.62, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.44, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.35, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.01, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.35, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.01, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.35, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.01, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7003150056262335, "mean_inference_ms": 1.251689765304647, "mean_action_processing_ms": 0.13412905696629593, "mean_env_wait_ms": 0.8430429668352946, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 390.0, "episode_reward_mean": 578.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.4}, "hist_stats": {"episode_reward": [576.0, 576.0, 630.0, 576.0, 582.0, 579.0, 579.0, 576.0, 582.0, 582.0, 630.0, 530.0, 579.0, 576.0, 630.0, 579.0, 582.0, 579.0, 582.0, 576.0, 579.0, 573.0, 582.0, 570.0, 519.0, 525.0, 522.0, 582.0, 582.0, 579.0, 581.0, 525.0, 587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 283.0, 293.0, 324.0, 306.0, 288.0, 288.0, 291.0, 291.0, 296.0, 283.0, 288.0, 291.0, 288.0, 288.0, 287.0, 295.0, 287.0, 295.0, 317.0, 313.0, 267.0, 263.0, 287.0, 292.0, 288.0, 288.0, 316.0, 314.0, 290.0, 289.0, 292.0, 290.0, 288.0, 291.0, 288.0, 294.0, 291.0, 285.0, 293.0, 286.0, 284.0, 289.0, 289.0, 293.0, 283.0, 287.0, 262.0, 257.0, 268.0, 257.0, 263.0, 259.0, 293.0, 289.0, 291.0, 291.0, 284.0, 295.0, 292.0, 289.0, 262.0, 263.0, 296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7003150056262335, "mean_inference_ms": 1.251689765304647, "mean_action_processing_ms": 0.13412905696629593, "mean_env_wait_ms": 0.8430429668352946, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8550400, "num_agent_steps_trained": 8550400, "num_env_steps_sampled": 4275200, "num_env_steps_trained": 4275200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4275200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8550400, "timers": {"training_iteration_time_ms": 3627.686, "learn_time_ms": 1120.74, "learn_throughput": 11421.03, "synch_weights_time_ms": 13.773}, "counters": {"num_env_steps_sampled": 4275200, "num_env_steps_trained": 4275200, "num_agent_steps_sampled": 8550400, "num_agent_steps_trained": 8550400}, "done": false, "episodes_total": 10688, "training_iteration": 334, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-10", "timestamp": 1666581730, "time_this_iter_s": 3.639566421508789, "time_total_s": 1281.6431787014008, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1281.6431787014008, "timesteps_since_restore": 0, "iterations_since_restore": 334, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.48, "ram_util_percent": 10.64}} +{"custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 179.45, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.4, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.22, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00028831925010308623, "policy_loss": -0.0006428910419344902, "vf_loss": 7.709566116333008, "vf_explained_var": 0.5375313758850098, "kl": 0.002994687994942069, "entropy": 0.8327674865722656, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4288000, "num_env_steps_trained": 4288000, "num_agent_steps_sampled": 8576000, "num_agent_steps_trained": 8576000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 581.45, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 290.725}, "custom_metrics": {"sparse_reward_mean": 201.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 179.45, "shaped_reward_min": 150, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.7, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.4, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.54, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.22, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.11, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.11, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.11, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.700256465783454, "mean_inference_ms": 1.2515049690584337, "mean_action_processing_ms": 0.13412077160613753, "mean_env_wait_ms": 0.8429494957222775, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 390.0, "episode_reward_mean": 581.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 192.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 290.725}, "hist_stats": {"episode_reward": [587.0, 576.0, 582.0, 522.0, 584.0, 576.0, 579.0, 516.0, 390.0, 630.0, 633.0, 579.0, 582.0, 582.0, 567.0, 579.0, 579.0, 630.0, 630.0, 579.0, 584.0, 636.0, 627.0, 579.0, 582.0, 533.0, 579.0, 582.0, 579.0, 579.0, 579.0, 570.0, 579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 291.0, 283.0, 293.0, 292.0, 290.0, 263.0, 259.0, 293.0, 291.0, 288.0, 288.0, 286.0, 293.0, 259.0, 257.0, 198.0, 192.0, 322.0, 308.0, 315.0, 318.0, 289.0, 290.0, 285.0, 297.0, 293.0, 289.0, 288.0, 279.0, 297.0, 282.0, 292.0, 287.0, 315.0, 315.0, 316.0, 314.0, 284.0, 295.0, 290.0, 294.0, 316.0, 320.0, 313.0, 314.0, 291.0, 288.0, 291.0, 291.0, 268.0, 265.0, 292.0, 287.0, 292.0, 290.0, 284.0, 295.0, 299.0, 280.0, 287.0, 292.0, 285.0, 285.0, 289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.700256465783454, "mean_inference_ms": 1.2515049690584337, "mean_action_processing_ms": 0.13412077160613753, "mean_env_wait_ms": 0.8429494957222775, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8576000, "num_agent_steps_trained": 8576000, "num_env_steps_sampled": 4288000, "num_env_steps_trained": 4288000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4288000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8576000, "timers": {"training_iteration_time_ms": 3597.338, "learn_time_ms": 1113.646, "learn_throughput": 11493.783, "synch_weights_time_ms": 14.092}, "counters": {"num_env_steps_sampled": 4288000, "num_env_steps_trained": 4288000, "num_agent_steps_sampled": 8576000, "num_agent_steps_trained": 8576000}, "done": false, "episodes_total": 10720, "training_iteration": 335, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-14", "timestamp": 1666581734, "time_this_iter_s": 3.7237629890441895, "time_total_s": 1285.366941690445, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1285.366941690445, "timesteps_since_restore": 0, "iterations_since_restore": 335, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.919999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.77, "shaped_reward_min": 96, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.82, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.02, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0048871831968426704, "policy_loss": 0.004516778513789177, "vf_loss": 7.847447395324707, "vf_explained_var": 0.5444375872612, "kl": 0.0027662317734211683, "entropy": 0.8286799192428589, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4300800, "num_env_steps_trained": 4300800, "num_agent_steps_sampled": 8601600, "num_agent_steps_trained": 8601600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 296.0, "episode_reward_mean": 578.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 144.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 289.185}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.77, "shaped_reward_min": 96, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.82, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.18, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 15.66, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.02, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.43, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.93, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 28, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.43, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.93, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 28, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.43, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.93, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 28, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7002034651518352, "mean_inference_ms": 1.251341339209156, "mean_action_processing_ms": 0.13411410299742965, "mean_env_wait_ms": 0.8428665021811821, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 296.0, "episode_reward_mean": 578.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 144.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 289.185}, "hist_stats": {"episode_reward": [579.0, 630.0, 630.0, 579.0, 527.0, 579.0, 582.0, 587.0, 582.0, 576.0, 564.0, 627.0, 573.0, 576.0, 582.0, 582.0, 576.0, 579.0, 584.0, 576.0, 627.0, 584.0, 576.0, 579.0, 590.0, 582.0, 573.0, 579.0, 576.0, 579.0, 573.0, 579.0, 582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 311.0, 319.0, 314.0, 316.0, 288.0, 291.0, 256.0, 271.0, 292.0, 287.0, 286.0, 296.0, 297.0, 290.0, 293.0, 289.0, 289.0, 287.0, 279.0, 285.0, 309.0, 318.0, 288.0, 285.0, 289.0, 287.0, 292.0, 290.0, 279.0, 303.0, 283.0, 293.0, 289.0, 290.0, 296.0, 288.0, 287.0, 289.0, 317.0, 310.0, 288.0, 296.0, 282.0, 294.0, 288.0, 291.0, 288.0, 302.0, 291.0, 291.0, 289.0, 284.0, 288.0, 291.0, 291.0, 285.0, 288.0, 291.0, 290.0, 283.0, 277.0, 302.0, 286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7002034651518352, "mean_inference_ms": 1.251341339209156, "mean_action_processing_ms": 0.13411410299742965, "mean_env_wait_ms": 0.8428665021811821, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8601600, "num_agent_steps_trained": 8601600, "num_env_steps_sampled": 4300800, "num_env_steps_trained": 4300800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4300800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8601600, "timers": {"training_iteration_time_ms": 3600.528, "learn_time_ms": 1117.896, "learn_throughput": 11450.085, "synch_weights_time_ms": 13.917}, "counters": {"num_env_steps_sampled": 4300800, "num_env_steps_trained": 4300800, "num_agent_steps_sampled": 8601600, "num_agent_steps_trained": 8601600}, "done": false, "episodes_total": 10752, "training_iteration": 336, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-18", "timestamp": 1666581738, "time_this_iter_s": 3.7050204277038574, "time_total_s": 1289.0719621181488, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1289.0719621181488, "timesteps_since_restore": 0, "iterations_since_restore": 336, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.96666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.23, "shaped_reward_min": 93, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.25, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.63, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002555068116635084, "policy_loss": -0.002903904300183058, "vf_loss": 7.657268524169922, "vf_explained_var": 0.5829470157623291, "kl": 0.002286064438521862, "entropy": 0.8337796926498413, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4313600, "num_env_steps_trained": 4313600, "num_agent_steps_sampled": 8627200, "num_agent_steps_trained": 8627200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 253.0, "episode_reward_mean": 574.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 124.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.315}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.23, "shaped_reward_min": 93, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.25, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.63, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.36, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.85, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.34, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.37, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.85, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.34, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.85, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.34, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7001310320699491, "mean_inference_ms": 1.2511716681710092, "mean_action_processing_ms": 0.13410539359749385, "mean_env_wait_ms": 0.8427664592330052, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 253.0, "episode_reward_mean": 574.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 124.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.315}, "hist_stats": {"episode_reward": [582.0, 582.0, 576.0, 582.0, 576.0, 576.0, 579.0, 630.0, 582.0, 579.0, 587.0, 630.0, 579.0, 582.0, 573.0, 573.0, 576.0, 573.0, 573.0, 576.0, 525.0, 582.0, 525.0, 624.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 296.0, 293.0, 289.0, 289.0, 287.0, 286.0, 296.0, 285.0, 291.0, 292.0, 284.0, 288.0, 291.0, 319.0, 311.0, 286.0, 296.0, 295.0, 284.0, 290.0, 297.0, 321.0, 309.0, 288.0, 291.0, 287.0, 295.0, 288.0, 285.0, 279.0, 294.0, 285.0, 291.0, 286.0, 287.0, 290.0, 283.0, 287.0, 289.0, 264.0, 261.0, 288.0, 294.0, 261.0, 264.0, 314.0, 310.0, 312.0, 318.0, 313.0, 317.0, 296.0, 286.0, 292.0, 290.0, 280.0, 296.0, 293.0, 289.0, 290.0, 289.0, 289.0, 290.0, 292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7001310320699491, "mean_inference_ms": 1.2511716681710092, "mean_action_processing_ms": 0.13410539359749385, "mean_env_wait_ms": 0.8427664592330052, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8627200, "num_agent_steps_trained": 8627200, "num_env_steps_sampled": 4313600, "num_env_steps_trained": 4313600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4313600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8627200, "timers": {"training_iteration_time_ms": 3599.436, "learn_time_ms": 1126.026, "learn_throughput": 11367.408, "synch_weights_time_ms": 13.268}, "counters": {"num_env_steps_sampled": 4313600, "num_env_steps_trained": 4313600, "num_agent_steps_sampled": 8627200, "num_agent_steps_trained": 8627200}, "done": false, "episodes_total": 10784, "training_iteration": 337, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-22", "timestamp": 1666581742, "time_this_iter_s": 3.647874355316162, "time_total_s": 1292.719836473465, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1292.719836473465, "timesteps_since_restore": 0, "iterations_since_restore": 337, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.46, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.53, "shaped_reward_min": 93, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.09, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.16, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.16, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.16, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0018456100951880217, "policy_loss": -0.002207729499787092, "vf_loss": 7.778796672821045, "vf_explained_var": 0.5378071069717407, "kl": 0.0025057008024305105, "entropy": 0.8315178155899048, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4326400, "num_env_steps_trained": 4326400, "num_agent_steps_sampled": 8652800, "num_agent_steps_trained": 8652800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 253.0, "episode_reward_mean": 574.53, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 124.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.265}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 178.53, "shaped_reward_min": 93, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.39, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.09, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.16, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.76, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.16, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.16, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7000574987189117, "mean_inference_ms": 1.2509881997334336, "mean_action_processing_ms": 0.13409452514616124, "mean_env_wait_ms": 0.842656773384128, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 253.0, "episode_reward_mean": 574.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 124.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.265}, "hist_stats": {"episode_reward": [582.0, 579.0, 525.0, 639.0, 587.0, 579.0, 573.0, 582.0, 579.0, 579.0, 579.0, 570.0, 630.0, 539.0, 582.0, 582.0, 579.0, 582.0, 582.0, 573.0, 536.0, 579.0, 579.0, 582.0, 296.0, 579.0, 582.0, 582.0, 579.0, 576.0, 579.0, 582.0, 576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 289.0, 290.0, 263.0, 262.0, 323.0, 316.0, 296.0, 291.0, 288.0, 291.0, 282.0, 291.0, 288.0, 294.0, 291.0, 288.0, 284.0, 295.0, 294.0, 285.0, 284.0, 286.0, 310.0, 320.0, 269.0, 270.0, 291.0, 291.0, 297.0, 285.0, 289.0, 290.0, 292.0, 290.0, 297.0, 285.0, 286.0, 287.0, 271.0, 265.0, 295.0, 284.0, 290.0, 289.0, 293.0, 289.0, 144.0, 152.0, 291.0, 288.0, 286.0, 296.0, 289.0, 293.0, 290.0, 289.0, 284.0, 292.0, 290.0, 289.0, 293.0, 289.0, 290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.7000574987189117, "mean_inference_ms": 1.2509881997334336, "mean_action_processing_ms": 0.13409452514616124, "mean_env_wait_ms": 0.842656773384128, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8652800, "num_agent_steps_trained": 8652800, "num_env_steps_sampled": 4326400, "num_env_steps_trained": 4326400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4326400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8652800, "timers": {"training_iteration_time_ms": 3606.7, "learn_time_ms": 1130.174, "learn_throughput": 11325.694, "synch_weights_time_ms": 12.833}, "counters": {"num_env_steps_sampled": 4326400, "num_env_steps_trained": 4326400, "num_agent_steps_sampled": 8652800, "num_agent_steps_trained": 8652800}, "done": false, "episodes_total": 10816, "training_iteration": 338, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-26", "timestamp": 1666581746, "time_this_iter_s": 3.591172695159912, "time_total_s": 1296.3110091686249, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1296.3110091686249, "timesteps_since_restore": 0, "iterations_since_restore": 338, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 176.04, "shaped_reward_min": 17, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.16, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.38, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.01, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.95, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.95, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.95, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00020561664132401347, "policy_loss": -0.00017668434884399176, "vf_loss": 7.972438812255859, "vf_explained_var": 0.5549718141555786, "kl": 0.004623632878065109, "entropy": 0.8298848867416382, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4339200, "num_env_steps_trained": 4339200, "num_agent_steps_sampled": 8678400, "num_agent_steps_trained": 8678400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 17.0, "episode_reward_mean": 566.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.22}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 176.04, "shaped_reward_min": 17, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.16, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.38, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.01, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.95, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.0, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.95, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.95, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6999861085772966, "mean_inference_ms": 1.250801244318193, "mean_action_processing_ms": 0.1340835429209035, "mean_env_wait_ms": 0.8425414523041331, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 17.0, "episode_reward_mean": 566.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.22}, "hist_stats": {"episode_reward": [576.0, 582.0, 530.0, 587.0, 584.0, 630.0, 584.0, 582.0, 584.0, 582.0, 582.0, 579.0, 582.0, 582.0, 582.0, 582.0, 253.0, 576.0, 584.0, 579.0, 579.0, 579.0, 582.0, 581.0, 576.0, 576.0, 579.0, 576.0, 582.0, 582.0, 579.0, 579.0, 579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 287.0, 295.0, 257.0, 273.0, 285.0, 302.0, 290.0, 294.0, 311.0, 319.0, 289.0, 295.0, 290.0, 292.0, 287.0, 297.0, 294.0, 288.0, 291.0, 291.0, 289.0, 290.0, 291.0, 291.0, 284.0, 298.0, 290.0, 292.0, 290.0, 292.0, 129.0, 124.0, 282.0, 294.0, 285.0, 299.0, 282.0, 297.0, 287.0, 292.0, 286.0, 293.0, 287.0, 295.0, 296.0, 285.0, 291.0, 285.0, 291.0, 285.0, 291.0, 288.0, 280.0, 296.0, 290.0, 292.0, 289.0, 293.0, 284.0, 295.0, 285.0, 294.0, 292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6999861085772966, "mean_inference_ms": 1.250801244318193, "mean_action_processing_ms": 0.1340835429209035, "mean_env_wait_ms": 0.8425414523041331, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8678400, "num_agent_steps_trained": 8678400, "num_env_steps_sampled": 4339200, "num_env_steps_trained": 4339200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4339200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8678400, "timers": {"training_iteration_time_ms": 3626.87, "learn_time_ms": 1147.917, "learn_throughput": 11150.631, "synch_weights_time_ms": 12.909}, "counters": {"num_env_steps_sampled": 4339200, "num_env_steps_trained": 4339200, "num_agent_steps_sampled": 8678400, "num_agent_steps_trained": 8678400}, "done": false, "episodes_total": 10848, "training_iteration": 339, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-30", "timestamp": 1666581750, "time_this_iter_s": 3.7197470664978027, "time_total_s": 1300.0307562351227, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1300.0307562351227, "timesteps_since_restore": 0, "iterations_since_restore": 339, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 175.24, "shaped_reward_min": 17, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.89, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.6, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00031251792097464204, "policy_loss": -0.0006752209737896919, "vf_loss": 7.836665153503418, "vf_explained_var": 0.5419378280639648, "kl": 0.0021702891681343317, "entropy": 0.8419244289398193, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4352000, "num_env_steps_trained": 4352000, "num_agent_steps_sampled": 8704000, "num_agent_steps_trained": 8704000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 17.0, "episode_reward_mean": 566.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.02}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 175.24, "shaped_reward_min": 17, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.89, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.6, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.33, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 10, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.82, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.2, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.63, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6999324824907194, "mean_inference_ms": 1.250869291011297, "mean_action_processing_ms": 0.13407466927511189, "mean_env_wait_ms": 0.8424427085405526, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 17.0, "episode_reward_mean": 566.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.02}, "hist_stats": {"episode_reward": [579.0, 582.0, 579.0, 576.0, 627.0, 582.0, 587.0, 636.0, 579.0, 587.0, 587.0, 630.0, 516.0, 579.0, 582.0, 582.0, 573.0, 579.0, 522.0, 582.0, 582.0, 579.0, 633.0, 581.0, 579.0, 633.0, 536.0, 639.0, 570.0, 579.0, 579.0, 525.0, 576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 290.0, 292.0, 292.0, 287.0, 280.0, 296.0, 309.0, 318.0, 290.0, 292.0, 290.0, 297.0, 321.0, 315.0, 286.0, 293.0, 285.0, 302.0, 290.0, 297.0, 314.0, 316.0, 258.0, 258.0, 294.0, 285.0, 287.0, 295.0, 291.0, 291.0, 283.0, 290.0, 286.0, 293.0, 254.0, 268.0, 291.0, 291.0, 289.0, 293.0, 291.0, 288.0, 314.0, 319.0, 300.0, 281.0, 286.0, 293.0, 319.0, 314.0, 267.0, 269.0, 319.0, 320.0, 289.0, 281.0, 293.0, 286.0, 279.0, 300.0, 259.0, 266.0, 285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6999324824907194, "mean_inference_ms": 1.250869291011297, "mean_action_processing_ms": 0.13407466927511189, "mean_env_wait_ms": 0.8424427085405526, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8704000, "num_agent_steps_trained": 8704000, "num_env_steps_sampled": 4352000, "num_env_steps_trained": 4352000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4352000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8704000, "timers": {"training_iteration_time_ms": 3648.191, "learn_time_ms": 1147.534, "learn_throughput": 11154.352, "synch_weights_time_ms": 12.122}, "counters": {"num_env_steps_sampled": 4352000, "num_env_steps_trained": 4352000, "num_agent_steps_sampled": 8704000, "num_agent_steps_trained": 8704000}, "done": false, "episodes_total": 10880, "training_iteration": 340, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-34", "timestamp": 1666581754, "time_this_iter_s": 3.9341721534729004, "time_total_s": 1303.9649283885956, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1303.9649283885956, "timesteps_since_restore": 0, "iterations_since_restore": 340, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.899999999999995, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 173.38, "shaped_reward_min": 17, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.67, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.45, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.56, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00047894648741930723, "policy_loss": 0.00012302363757044077, "vf_loss": 7.830498695373535, "vf_explained_var": 0.5219379663467407, "kl": 0.0018186834640800953, "entropy": 0.8542532324790955, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4364800, "num_env_steps_trained": 4364800, "num_agent_steps_sampled": 8729600, "num_agent_steps_trained": 8729600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 17.0, "episode_reward_mean": 559.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.89}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 173.38, "shaped_reward_min": 17, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.67, "onion_pickup_agent_0_min": 1, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.45, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.56, "useful_onion_pickup_agent_0_min": 1, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.36, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.73, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.84, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.19, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.26, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.51, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.36, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.36, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6998735604012057, "mean_inference_ms": 1.2509366550400884, "mean_action_processing_ms": 0.13406633406435667, "mean_env_wait_ms": 0.8423450797843902, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 17.0, "episode_reward_mean": 559.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.89}, "hist_stats": {"episode_reward": [576.0, 576.0, 576.0, 579.0, 17.0, 579.0, 573.0, 579.0, 587.0, 582.0, 522.0, 582.0, 573.0, 576.0, 516.0, 582.0, 573.0, 579.0, 536.0, 525.0, 527.0, 582.0, 539.0, 579.0, 579.0, 525.0, 519.0, 579.0, 582.0, 579.0, 587.0, 516.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 287.0, 289.0, 285.0, 291.0, 286.0, 293.0, 3.0, 14.0, 284.0, 295.0, 285.0, 288.0, 292.0, 287.0, 300.0, 287.0, 296.0, 286.0, 256.0, 266.0, 288.0, 294.0, 293.0, 280.0, 288.0, 288.0, 254.0, 262.0, 292.0, 290.0, 284.0, 289.0, 287.0, 292.0, 258.0, 278.0, 258.0, 267.0, 265.0, 262.0, 286.0, 296.0, 266.0, 273.0, 288.0, 291.0, 292.0, 287.0, 257.0, 268.0, 259.0, 260.0, 285.0, 294.0, 289.0, 293.0, 292.0, 287.0, 293.0, 294.0, 250.0, 266.0, 288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6998735604012057, "mean_inference_ms": 1.2509366550400884, "mean_action_processing_ms": 0.13406633406435667, "mean_env_wait_ms": 0.8423450797843902, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8729600, "num_agent_steps_trained": 8729600, "num_env_steps_sampled": 4364800, "num_env_steps_trained": 4364800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4364800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8729600, "timers": {"training_iteration_time_ms": 3651.695, "learn_time_ms": 1144.334, "learn_throughput": 11185.543, "synch_weights_time_ms": 11.629}, "counters": {"num_env_steps_sampled": 4364800, "num_env_steps_trained": 4364800, "num_agent_steps_sampled": 8729600, "num_agent_steps_trained": 8729600}, "done": false, "episodes_total": 10912, "training_iteration": 341, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-38", "timestamp": 1666581758, "time_this_iter_s": 3.704293727874756, "time_total_s": 1307.6692221164703, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1307.6692221164703, "timesteps_since_restore": 0, "iterations_since_restore": 341, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.75, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.0, "shaped_reward_min": 136, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.6, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.72, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.49, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.42, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -8.920964319258928e-06, "policy_loss": -0.000365772342775017, "vf_loss": 7.832732677459717, "vf_explained_var": 0.5286672115325928, "kl": 0.0022454443387687206, "entropy": 0.8528425693511963, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4377600, "num_env_steps_trained": 4377600, "num_agent_steps_sampled": 8755200, "num_agent_steps_trained": 8755200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 456.0, "episode_reward_mean": 565.4, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.7}, "custom_metrics": {"sparse_reward_mean": 195.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.0, "shaped_reward_min": 136, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.6, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 17.72, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.49, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 17.52, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.26, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.85, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.63, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.15, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 4, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.48, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.42, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 9, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.26, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.26, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6998021023353869, "mean_inference_ms": 1.250992359542731, "mean_action_processing_ms": 0.13405790576990334, "mean_env_wait_ms": 0.8422437053910774, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 456.0, "episode_reward_mean": 565.4, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 222.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.7}, "hist_stats": {"episode_reward": [579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 522.0, 579.0, 573.0, 579.0, 516.0, 570.0, 576.0, 522.0, 582.0, 582.0, 573.0, 570.0, 582.0, 576.0, 530.0, 579.0, 582.0, 522.0, 630.0, 573.0, 579.0, 582.0, 579.0, 579.0, 576.0, 579.0, 573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 287.0, 292.0, 287.0, 283.0, 256.0, 266.0, 293.0, 289.0, 288.0, 294.0, 262.0, 260.0, 288.0, 291.0, 284.0, 289.0, 291.0, 288.0, 254.0, 262.0, 279.0, 291.0, 288.0, 288.0, 261.0, 261.0, 290.0, 292.0, 289.0, 293.0, 288.0, 285.0, 282.0, 288.0, 286.0, 296.0, 290.0, 286.0, 262.0, 268.0, 283.0, 296.0, 288.0, 294.0, 261.0, 261.0, 311.0, 319.0, 288.0, 285.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 292.0, 287.0, 286.0, 290.0, 292.0, 287.0, 282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6998021023353869, "mean_inference_ms": 1.250992359542731, "mean_action_processing_ms": 0.13405790576990334, "mean_env_wait_ms": 0.8422437053910774, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8755200, "num_agent_steps_trained": 8755200, "num_env_steps_sampled": 4377600, "num_env_steps_trained": 4377600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4377600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8755200, "timers": {"training_iteration_time_ms": 3653.038, "learn_time_ms": 1147.561, "learn_throughput": 11154.089, "synch_weights_time_ms": 11.78}, "counters": {"num_env_steps_sampled": 4377600, "num_env_steps_trained": 4377600, "num_agent_steps_sampled": 8755200, "num_agent_steps_trained": 8755200}, "done": false, "episodes_total": 10944, "training_iteration": 342, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-41", "timestamp": 1666581761, "time_this_iter_s": 3.6865930557250977, "time_total_s": 1311.3558151721954, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1311.3558151721954, "timesteps_since_restore": 0, "iterations_since_restore": 342, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.44, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 174.81, "shaped_reward_min": 122, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.44, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.44, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.44, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000201219052542001, "policy_loss": -0.00016514863818883896, "vf_loss": 7.8804121017456055, "vf_explained_var": 0.5171889066696167, "kl": 0.0026697758585214615, "entropy": 0.8433467149734497, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4390400, "num_env_steps_trained": 4390400, "num_agent_steps_sampled": 8780800, "num_agent_steps_trained": 8780800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 322.0, "episode_reward_mean": 563.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.605}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 174.81, "shaped_reward_min": 122, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.79, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.67, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.44, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.03, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.59, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.38, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.31, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.44, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.03, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.44, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.03, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.699720819683431, "mean_inference_ms": 1.2508368529183134, "mean_action_processing_ms": 0.13404914974769147, "mean_env_wait_ms": 0.8421437948391369, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 322.0, "episode_reward_mean": 563.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.605}, "hist_stats": {"episode_reward": [573.0, 522.0, 579.0, 582.0, 525.0, 579.0, 579.0, 582.0, 587.0, 579.0, 587.0, 579.0, 530.0, 522.0, 581.0, 579.0, 576.0, 579.0, 573.0, 522.0, 582.0, 513.0, 576.0, 576.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 582.0, 530.0, 530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 291.0, 262.0, 260.0, 291.0, 288.0, 290.0, 292.0, 264.0, 261.0, 289.0, 290.0, 292.0, 287.0, 289.0, 293.0, 287.0, 300.0, 293.0, 286.0, 294.0, 293.0, 290.0, 289.0, 274.0, 256.0, 258.0, 264.0, 297.0, 284.0, 287.0, 292.0, 286.0, 290.0, 293.0, 286.0, 284.0, 289.0, 267.0, 255.0, 287.0, 295.0, 251.0, 262.0, 287.0, 289.0, 290.0, 286.0, 257.0, 268.0, 293.0, 289.0, 280.0, 293.0, 318.0, 309.0, 291.0, 288.0, 263.0, 264.0, 295.0, 287.0, 273.0, 257.0, 263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.699720819683431, "mean_inference_ms": 1.2508368529183134, "mean_action_processing_ms": 0.13404914974769147, "mean_env_wait_ms": 0.8421437948391369, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8780800, "num_agent_steps_trained": 8780800, "num_env_steps_sampled": 4390400, "num_env_steps_trained": 4390400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4390400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8780800, "timers": {"training_iteration_time_ms": 3647.502, "learn_time_ms": 1144.384, "learn_throughput": 11185.06, "synch_weights_time_ms": 10.64}, "counters": {"num_env_steps_sampled": 4390400, "num_env_steps_trained": 4390400, "num_agent_steps_sampled": 8780800, "num_agent_steps_trained": 8780800}, "done": false, "episodes_total": 10976, "training_iteration": 343, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-45", "timestamp": 1666581765, "time_this_iter_s": 3.7214314937591553, "time_total_s": 1315.0772466659546, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1315.0772466659546, "timesteps_since_restore": 0, "iterations_since_restore": 343, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.55, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.12, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.05, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.99, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00018610269762575626, "policy_loss": -0.00017776305321604013, "vf_loss": 7.827777862548828, "vf_explained_var": 0.5340909957885742, "kl": 0.0031581996008753777, "entropy": 0.8378207683563232, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4403200, "num_env_steps_trained": 4403200, "num_agent_steps_sampled": 8806400, "num_agent_steps_trained": 8806400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 322.0, "episode_reward_mean": 563.92, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.96}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.12, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.05, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 29, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.99, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 29, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 29, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.52, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 4.81, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.3, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 10, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.24, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 10, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 29, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 29, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6996287283780928, "mean_inference_ms": 1.250660796238729, "mean_action_processing_ms": 0.1340424521978151, "mean_env_wait_ms": 0.8420568664063134, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 322.0, "episode_reward_mean": 563.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.96}, "hist_stats": {"episode_reward": [530.0, 570.0, 579.0, 525.0, 584.0, 582.0, 582.0, 579.0, 579.0, 584.0, 527.0, 522.0, 579.0, 582.0, 582.0, 579.0, 527.0, 576.0, 456.0, 582.0, 576.0, 579.0, 522.0, 582.0, 573.0, 584.0, 527.0, 522.0, 584.0, 579.0, 573.0, 530.0, 573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [263.0, 267.0, 281.0, 289.0, 290.0, 289.0, 260.0, 265.0, 287.0, 297.0, 287.0, 295.0, 284.0, 298.0, 287.0, 292.0, 291.0, 288.0, 286.0, 298.0, 268.0, 259.0, 265.0, 257.0, 289.0, 290.0, 293.0, 289.0, 293.0, 289.0, 284.0, 295.0, 259.0, 268.0, 286.0, 290.0, 222.0, 234.0, 287.0, 295.0, 284.0, 292.0, 292.0, 287.0, 261.0, 261.0, 294.0, 288.0, 289.0, 284.0, 291.0, 293.0, 269.0, 258.0, 261.0, 261.0, 292.0, 292.0, 288.0, 291.0, 283.0, 290.0, 260.0, 270.0, 289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6996287283780928, "mean_inference_ms": 1.250660796238729, "mean_action_processing_ms": 0.1340424521978151, "mean_env_wait_ms": 0.8420568664063134, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8806400, "num_agent_steps_trained": 8806400, "num_env_steps_sampled": 4403200, "num_env_steps_trained": 4403200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4403200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8806400, "timers": {"training_iteration_time_ms": 3646.251, "learn_time_ms": 1143.163, "learn_throughput": 11197.002, "synch_weights_time_ms": 10.685}, "counters": {"num_env_steps_sampled": 4403200, "num_env_steps_trained": 4403200, "num_agent_steps_sampled": 8806400, "num_agent_steps_trained": 8806400}, "done": false, "episodes_total": 11008, "training_iteration": 344, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-49", "timestamp": 1666581769, "time_this_iter_s": 3.617185592651367, "time_total_s": 1318.694432258606, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1318.694432258606, "timesteps_since_restore": 0, "iterations_since_restore": 344, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.76, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.07, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0020328951068222523, "policy_loss": 0.0016740041319280863, "vf_loss": 7.817473411560059, "vf_explained_var": 0.5217878818511963, "kl": 0.002134096808731556, "entropy": 0.8457106351852417, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4416000, "num_env_steps_trained": 4416000, "num_agent_steps_sampled": 8832000, "num_agent_steps_trained": 8832000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 322.0, "episode_reward_mean": 568.47, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.235}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.07, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.76, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.52, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 10, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.14, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.79, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6995297820196078, "mean_inference_ms": 1.2504793126314804, "mean_action_processing_ms": 0.13403454747663798, "mean_env_wait_ms": 0.8419635373672074, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 322.0, "episode_reward_mean": 568.47, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 153.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.235}, "hist_stats": {"episode_reward": [573.0, 633.0, 579.0, 525.0, 576.0, 525.0, 584.0, 530.0, 582.0, 525.0, 536.0, 582.0, 570.0, 582.0, 630.0, 579.0, 587.0, 587.0, 582.0, 582.0, 630.0, 576.0, 582.0, 573.0, 462.0, 582.0, 567.0, 567.0, 525.0, 573.0, 582.0, 322.0, 573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 284.0, 322.0, 311.0, 293.0, 286.0, 261.0, 264.0, 285.0, 291.0, 261.0, 264.0, 296.0, 288.0, 264.0, 266.0, 296.0, 286.0, 269.0, 256.0, 269.0, 267.0, 290.0, 292.0, 285.0, 285.0, 289.0, 293.0, 311.0, 319.0, 287.0, 292.0, 289.0, 298.0, 292.0, 295.0, 294.0, 288.0, 288.0, 294.0, 311.0, 319.0, 285.0, 291.0, 294.0, 288.0, 289.0, 284.0, 228.0, 234.0, 290.0, 292.0, 276.0, 291.0, 276.0, 291.0, 267.0, 258.0, 284.0, 289.0, 289.0, 293.0, 153.0, 169.0, 286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6995297820196078, "mean_inference_ms": 1.2504793126314804, "mean_action_processing_ms": 0.13403454747663798, "mean_env_wait_ms": 0.8419635373672074, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8832000, "num_agent_steps_trained": 8832000, "num_env_steps_sampled": 4416000, "num_env_steps_trained": 4416000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4416000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8832000, "timers": {"training_iteration_time_ms": 3645.109, "learn_time_ms": 1147.935, "learn_throughput": 11150.453, "synch_weights_time_ms": 10.013}, "counters": {"num_env_steps_sampled": 4416000, "num_env_steps_trained": 4416000, "num_agent_steps_sampled": 8832000, "num_agent_steps_trained": 8832000}, "done": false, "episodes_total": 11040, "training_iteration": 345, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-53", "timestamp": 1666581773, "time_this_iter_s": 3.703167676925659, "time_total_s": 1322.3975999355316, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1322.3975999355316, "timesteps_since_restore": 0, "iterations_since_restore": 345, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.98, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.5, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.39, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.39, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.39, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014021097449585795, "policy_loss": -0.0017652350943535566, "vf_loss": 7.849297523498535, "vf_explained_var": 0.5106943845748901, "kl": 0.0021750519517809153, "entropy": 0.8436084985733032, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4428800, "num_env_steps_trained": 4428800, "num_agent_steps_sampled": 8857600, "num_agent_steps_trained": 8857600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 569.58, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 205.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.79}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.98, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.5, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.39, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.39, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.39, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6994271831971862, "mean_inference_ms": 1.250277241170522, "mean_action_processing_ms": 0.13402499118015013, "mean_env_wait_ms": 0.8418477165944581, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 569.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 205.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.79}, "hist_stats": {"episode_reward": [573.0, 582.0, 570.0, 582.0, 582.0, 579.0, 587.0, 582.0, 587.0, 573.0, 522.0, 579.0, 582.0, 411.0, 582.0, 582.0, 579.0, 582.0, 570.0, 579.0, 579.0, 482.0, 579.0, 524.0, 576.0, 579.0, 582.0, 582.0, 573.0, 570.0, 582.0, 639.0, 582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 293.0, 289.0, 290.0, 280.0, 298.0, 284.0, 293.0, 289.0, 295.0, 284.0, 289.0, 298.0, 292.0, 290.0, 294.0, 293.0, 288.0, 285.0, 262.0, 260.0, 287.0, 292.0, 291.0, 291.0, 206.0, 205.0, 291.0, 291.0, 292.0, 290.0, 283.0, 296.0, 299.0, 283.0, 281.0, 289.0, 289.0, 290.0, 286.0, 293.0, 234.0, 248.0, 286.0, 293.0, 269.0, 255.0, 291.0, 285.0, 287.0, 292.0, 295.0, 287.0, 297.0, 285.0, 287.0, 286.0, 277.0, 293.0, 288.0, 294.0, 323.0, 316.0, 293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6994271831971862, "mean_inference_ms": 1.250277241170522, "mean_action_processing_ms": 0.13402499118015013, "mean_env_wait_ms": 0.8418477165944581, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8857600, "num_agent_steps_trained": 8857600, "num_env_steps_sampled": 4428800, "num_env_steps_trained": 4428800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4428800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8857600, "timers": {"training_iteration_time_ms": 3631.459, "learn_time_ms": 1144.888, "learn_throughput": 11180.137, "synch_weights_time_ms": 9.559}, "counters": {"num_env_steps_sampled": 4428800, "num_env_steps_trained": 4428800, "num_agent_steps_sampled": 8857600, "num_agent_steps_trained": 8857600}, "done": false, "episodes_total": 11072, "training_iteration": 346, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-22-57", "timestamp": 1666581777, "time_this_iter_s": 3.5644054412841797, "time_total_s": 1325.9620053768158, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1325.9620053768158, "timesteps_since_restore": 0, "iterations_since_restore": 346, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.18, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.31, "shaped_reward_min": 80, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.48, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000530306831933558, "policy_loss": 0.00014949997421354055, "vf_loss": 8.010214805603027, "vf_explained_var": 0.5265098810195923, "kl": 0.0022951120045036077, "entropy": 0.8404301404953003, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4441600, "num_env_steps_trained": 4441600, "num_agent_steps_sampled": 8883200, "num_agent_steps_trained": 8883200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 240.0, "episode_reward_mean": 565.91, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.955}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.31, "shaped_reward_min": 80, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.26, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.64, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.55, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.48, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.55, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.55, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6993324647836038, "mean_inference_ms": 1.2500780366039015, "mean_action_processing_ms": 0.1340147881497094, "mean_env_wait_ms": 0.8417315597688259, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 240.0, "episode_reward_mean": 565.91, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 117.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.955}, "hist_stats": {"episode_reward": [582.0, 573.0, 525.0, 573.0, 579.0, 579.0, 579.0, 582.0, 582.0, 582.0, 573.0, 576.0, 582.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 513.0, 573.0, 573.0, 582.0, 570.0, 522.0, 581.0, 582.0, 630.0, 567.0, 582.0, 582.0, 579.0, 525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 287.0, 286.0, 258.0, 267.0, 283.0, 290.0, 286.0, 293.0, 290.0, 289.0, 286.0, 293.0, 288.0, 294.0, 287.0, 295.0, 280.0, 302.0, 285.0, 288.0, 291.0, 285.0, 286.0, 296.0, 292.0, 290.0, 297.0, 285.0, 285.0, 297.0, 294.0, 288.0, 289.0, 293.0, 282.0, 297.0, 250.0, 263.0, 294.0, 279.0, 287.0, 286.0, 290.0, 292.0, 279.0, 291.0, 258.0, 264.0, 284.0, 297.0, 291.0, 291.0, 314.0, 316.0, 283.0, 284.0, 290.0, 292.0, 291.0, 291.0, 291.0, 288.0, 260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6993324647836038, "mean_inference_ms": 1.2500780366039015, "mean_action_processing_ms": 0.1340147881497094, "mean_env_wait_ms": 0.8417315597688259, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8883200, "num_agent_steps_trained": 8883200, "num_env_steps_sampled": 4441600, "num_env_steps_trained": 4441600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4441600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8883200, "timers": {"training_iteration_time_ms": 3628.325, "learn_time_ms": 1138.994, "learn_throughput": 11237.988, "synch_weights_time_ms": 9.363}, "counters": {"num_env_steps_sampled": 4441600, "num_env_steps_trained": 4441600, "num_agent_steps_sampled": 8883200, "num_agent_steps_trained": 8883200}, "done": false, "episodes_total": 11104, "training_iteration": 347, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-01", "timestamp": 1666581781, "time_this_iter_s": 3.624319553375244, "time_total_s": 1329.586324930191, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1329.586324930191, "timesteps_since_restore": 0, "iterations_since_restore": 347, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.660000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.49, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.04, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.72, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.54, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008702042396180332, "policy_loss": 0.0005100345006212592, "vf_loss": 7.837711811065674, "vf_explained_var": 0.5423364639282227, "kl": 0.00229667779058218, "entropy": 0.8472009897232056, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4454400, "num_env_steps_trained": 4454400, "num_agent_steps_sampled": 8908800, "num_agent_steps_trained": 8908800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 558.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.445}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.49, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.04, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 26, "onion_pickup_agent_1_mean": 16.72, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.91, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 26, "useful_onion_pickup_agent_1_mean": 16.54, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 4, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.68, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.68, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.68, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6992606177352377, "mean_inference_ms": 1.2499053635842985, "mean_action_processing_ms": 0.13400700343117392, "mean_env_wait_ms": 0.8416441949257816, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 558.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.445}, "hist_stats": {"episode_reward": [525.0, 579.0, 522.0, 627.0, 479.0, 573.0, 567.0, 516.0, 516.0, 587.0, 579.0, 582.0, 576.0, 587.0, 584.0, 525.0, 573.0, 576.0, 579.0, 573.0, 579.0, 468.0, 573.0, 579.0, 582.0, 576.0, 576.0, 576.0, 579.0, 582.0, 567.0, 633.0, 582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 288.0, 291.0, 258.0, 264.0, 304.0, 323.0, 233.0, 246.0, 282.0, 291.0, 278.0, 289.0, 263.0, 253.0, 261.0, 255.0, 294.0, 293.0, 289.0, 290.0, 293.0, 289.0, 291.0, 285.0, 291.0, 296.0, 287.0, 297.0, 258.0, 267.0, 280.0, 293.0, 284.0, 292.0, 282.0, 297.0, 289.0, 284.0, 288.0, 291.0, 235.0, 233.0, 287.0, 286.0, 290.0, 289.0, 291.0, 291.0, 296.0, 280.0, 292.0, 284.0, 286.0, 290.0, 287.0, 292.0, 288.0, 294.0, 279.0, 288.0, 315.0, 318.0, 282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6992606177352377, "mean_inference_ms": 1.2499053635842985, "mean_action_processing_ms": 0.13400700343117392, "mean_env_wait_ms": 0.8416441949257816, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8908800, "num_agent_steps_trained": 8908800, "num_env_steps_sampled": 4454400, "num_env_steps_trained": 4454400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4454400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8908800, "timers": {"training_iteration_time_ms": 3645.318, "learn_time_ms": 1148.275, "learn_throughput": 11147.157, "synch_weights_time_ms": 9.796}, "counters": {"num_env_steps_sampled": 4454400, "num_env_steps_trained": 4454400, "num_agent_steps_sampled": 8908800, "num_agent_steps_trained": 8908800}, "done": false, "episodes_total": 11136, "training_iteration": 348, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-04", "timestamp": 1666581784, "time_this_iter_s": 3.7834579944610596, "time_total_s": 1333.369782924652, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1333.369782924652, "timesteps_since_restore": 0, "iterations_since_restore": 348, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.950000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.42, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.76, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001777943572960794, "policy_loss": -0.0021599321626126766, "vf_loss": 7.920291900634766, "vf_explained_var": 0.5149234533309937, "kl": 0.0022678468376398087, "entropy": 0.8200807571411133, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4467200, "num_env_steps_trained": 4467200, "num_agent_steps_sampled": 8934400, "num_agent_steps_trained": 8934400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 558.82, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.41}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.42, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.73, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.76, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.53, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.62, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.53, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.53, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6992173718947344, "mean_inference_ms": 1.2497383539343545, "mean_action_processing_ms": 0.13400108868613897, "mean_env_wait_ms": 0.8415750013057658, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 558.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.41}, "hist_stats": {"episode_reward": [582.0, 582.0, 525.0, 570.0, 582.0, 582.0, 570.0, 582.0, 576.0, 576.0, 581.0, 579.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 240.0, 573.0, 513.0, 408.0, 579.0, 573.0, 573.0, 576.0, 573.0, 525.0, 573.0, 524.0, 570.0, 570.0, 630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 300.0, 286.0, 296.0, 259.0, 266.0, 277.0, 293.0, 290.0, 292.0, 292.0, 290.0, 281.0, 289.0, 290.0, 292.0, 285.0, 291.0, 289.0, 287.0, 289.0, 292.0, 287.0, 292.0, 291.0, 291.0, 288.0, 294.0, 288.0, 294.0, 299.0, 283.0, 295.0, 287.0, 297.0, 282.0, 117.0, 123.0, 283.0, 290.0, 259.0, 254.0, 198.0, 210.0, 283.0, 296.0, 295.0, 278.0, 292.0, 281.0, 291.0, 285.0, 276.0, 297.0, 262.0, 263.0, 294.0, 279.0, 255.0, 269.0, 293.0, 277.0, 287.0, 283.0, 318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6992173718947344, "mean_inference_ms": 1.2497383539343545, "mean_action_processing_ms": 0.13400108868613897, "mean_env_wait_ms": 0.8415750013057658, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8934400, "num_agent_steps_trained": 8934400, "num_env_steps_sampled": 4467200, "num_env_steps_trained": 4467200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4467200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8934400, "timers": {"training_iteration_time_ms": 3649.028, "learn_time_ms": 1153.769, "learn_throughput": 11094.076, "synch_weights_time_ms": 10.192}, "counters": {"num_env_steps_sampled": 4467200, "num_env_steps_trained": 4467200, "num_agent_steps_sampled": 8934400, "num_agent_steps_trained": 8934400}, "done": false, "episodes_total": 11168, "training_iteration": 349, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-08", "timestamp": 1666581788, "time_this_iter_s": 3.756636619567871, "time_total_s": 1337.12641954422, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1337.12641954422, "timesteps_since_restore": 0, "iterations_since_restore": 349, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.639999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.24, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.37, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.47, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00298161618411541, "policy_loss": 0.0026111463084816933, "vf_loss": 7.9099225997924805, "vf_explained_var": 0.5091748833656311, "kl": 0.002554523292928934, "entropy": 0.8410444259643555, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4480000, "num_env_steps_trained": 4480000, "num_agent_steps_sampled": 8960000, "num_agent_steps_trained": 8960000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 563.84, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.92}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.24, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.49, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.37, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.47, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.14, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.34, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 5, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.14, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.34, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.14, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.34, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.699179377440831, "mean_inference_ms": 1.2495672904889525, "mean_action_processing_ms": 0.13399441691854486, "mean_env_wait_ms": 0.8414986864344643, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 140.0, "episode_reward_mean": 563.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 66.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.92}, "hist_stats": {"episode_reward": [630.0, 576.0, 582.0, 570.0, 582.0, 582.0, 576.0, 579.0, 473.0, 573.0, 582.0, 576.0, 576.0, 582.0, 576.0, 582.0, 579.0, 630.0, 579.0, 573.0, 516.0, 630.0, 140.0, 576.0, 522.0, 530.0, 525.0, 582.0, 587.0, 582.0, 576.0, 630.0, 579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 289.0, 287.0, 288.0, 294.0, 281.0, 289.0, 292.0, 290.0, 289.0, 293.0, 290.0, 286.0, 295.0, 284.0, 229.0, 244.0, 290.0, 283.0, 290.0, 292.0, 293.0, 283.0, 288.0, 288.0, 287.0, 295.0, 284.0, 292.0, 289.0, 293.0, 295.0, 284.0, 318.0, 312.0, 291.0, 288.0, 282.0, 291.0, 259.0, 257.0, 319.0, 311.0, 74.0, 66.0, 291.0, 285.0, 259.0, 263.0, 261.0, 269.0, 260.0, 265.0, 293.0, 289.0, 289.0, 298.0, 291.0, 291.0, 292.0, 284.0, 315.0, 315.0, 294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.699179377440831, "mean_inference_ms": 1.2495672904889525, "mean_action_processing_ms": 0.13399441691854486, "mean_env_wait_ms": 0.8414986864344643, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8960000, "num_agent_steps_trained": 8960000, "num_env_steps_sampled": 4480000, "num_env_steps_trained": 4480000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4480000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8960000, "timers": {"training_iteration_time_ms": 3626.79, "learn_time_ms": 1150.532, "learn_throughput": 11125.291, "synch_weights_time_ms": 10.217}, "counters": {"num_env_steps_sampled": 4480000, "num_env_steps_trained": 4480000, "num_agent_steps_sampled": 8960000, "num_agent_steps_trained": 8960000}, "done": false, "episodes_total": 11200, "training_iteration": 350, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-12", "timestamp": 1666581792, "time_this_iter_s": 3.706865072250366, "time_total_s": 1340.8332846164703, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1340.8332846164703, "timesteps_since_restore": 0, "iterations_since_restore": 350, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.8, "shaped_reward_min": 125, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.64, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.64, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.64, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001787699293345213, "policy_loss": -0.0005462196422740817, "vf_loss": 7.881190776824951, "vf_explained_var": 0.5432215929031372, "kl": 0.002550200093537569, "entropy": 0.8413398265838623, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4492800, "num_env_steps_trained": 4492800, "num_agent_steps_sampled": 8985600, "num_agent_steps_trained": 8985600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 405.0, "episode_reward_mean": 566.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.0}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.8, "shaped_reward_min": 125, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.17, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.64, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.64, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.64, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6991359079758581, "mean_inference_ms": 1.2493866116169676, "mean_action_processing_ms": 0.13398713337582482, "mean_env_wait_ms": 0.8414112426488535, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 405.0, "episode_reward_mean": 566.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 195.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.0}, "hist_stats": {"episode_reward": [579.0, 582.0, 411.0, 522.0, 584.0, 582.0, 570.0, 579.0, 579.0, 579.0, 576.0, 627.0, 570.0, 573.0, 522.0, 576.0, 405.0, 530.0, 579.0, 587.0, 587.0, 582.0, 530.0, 630.0, 468.0, 576.0, 492.0, 576.0, 582.0, 582.0, 522.0, 582.0, 576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 285.0, 282.0, 300.0, 195.0, 216.0, 261.0, 261.0, 295.0, 289.0, 291.0, 291.0, 280.0, 290.0, 288.0, 291.0, 285.0, 294.0, 286.0, 293.0, 288.0, 288.0, 319.0, 308.0, 285.0, 285.0, 293.0, 280.0, 255.0, 267.0, 297.0, 279.0, 196.0, 209.0, 270.0, 260.0, 281.0, 298.0, 291.0, 296.0, 287.0, 300.0, 287.0, 295.0, 267.0, 263.0, 314.0, 316.0, 233.0, 235.0, 288.0, 288.0, 239.0, 253.0, 288.0, 288.0, 288.0, 294.0, 288.0, 294.0, 256.0, 266.0, 291.0, 291.0, 284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6991359079758581, "mean_inference_ms": 1.2493866116169676, "mean_action_processing_ms": 0.13398713337582482, "mean_env_wait_ms": 0.8414112426488535, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 8985600, "num_agent_steps_trained": 8985600, "num_env_steps_sampled": 4492800, "num_env_steps_trained": 4492800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4492800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 8985600, "timers": {"training_iteration_time_ms": 3629.976, "learn_time_ms": 1157.394, "learn_throughput": 11059.331, "synch_weights_time_ms": 9.886}, "counters": {"num_env_steps_sampled": 4492800, "num_env_steps_trained": 4492800, "num_agent_steps_sampled": 8985600, "num_agent_steps_trained": 8985600}, "done": false, "episodes_total": 11232, "training_iteration": 351, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-16", "timestamp": 1666581796, "time_this_iter_s": 3.7251431941986084, "time_total_s": 1344.558427810669, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1344.558427810669, "timesteps_since_restore": 0, "iterations_since_restore": 351, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.6, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.56, "shaped_reward_min": 145, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.2, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.94, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004346300265751779, "policy_loss": -0.000794908672105521, "vf_loss": 7.800143241882324, "vf_explained_var": 0.5466172695159912, "kl": 0.002108823275193572, "entropy": 0.8394697904586792, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4505600, "num_env_steps_trained": 4505600, "num_agent_steps_sampled": 9011200, "num_agent_steps_trained": 9011200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 567.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.98}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.56, "shaped_reward_min": 145, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.2, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.16, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.02, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.94, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.22, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.77, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 9, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990793068100484, "mean_inference_ms": 1.249211086551166, "mean_action_processing_ms": 0.13397983546694048, "mean_env_wait_ms": 0.8413220532129634, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 567.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.98}, "hist_stats": {"episode_reward": [576.0, 573.0, 633.0, 579.0, 627.0, 525.0, 579.0, 584.0, 579.0, 573.0, 530.0, 530.0, 576.0, 576.0, 587.0, 579.0, 630.0, 582.0, 587.0, 576.0, 573.0, 525.0, 570.0, 579.0, 579.0, 570.0, 582.0, 522.0, 530.0, 579.0, 582.0, 579.0, 573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 292.0, 289.0, 284.0, 319.0, 314.0, 289.0, 290.0, 311.0, 316.0, 262.0, 263.0, 292.0, 287.0, 289.0, 295.0, 286.0, 293.0, 284.0, 289.0, 257.0, 273.0, 268.0, 262.0, 286.0, 290.0, 290.0, 286.0, 290.0, 297.0, 293.0, 286.0, 322.0, 308.0, 283.0, 299.0, 285.0, 302.0, 284.0, 292.0, 286.0, 287.0, 267.0, 258.0, 290.0, 280.0, 289.0, 290.0, 290.0, 289.0, 281.0, 289.0, 293.0, 289.0, 255.0, 267.0, 259.0, 271.0, 289.0, 290.0, 289.0, 293.0, 291.0, 288.0, 285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990793068100484, "mean_inference_ms": 1.249211086551166, "mean_action_processing_ms": 0.13397983546694048, "mean_env_wait_ms": 0.8413220532129634, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9011200, "num_agent_steps_trained": 9011200, "num_env_steps_sampled": 4505600, "num_env_steps_trained": 4505600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4505600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9011200, "timers": {"training_iteration_time_ms": 3630.348, "learn_time_ms": 1158.032, "learn_throughput": 11053.234, "synch_weights_time_ms": 9.73}, "counters": {"num_env_steps_sampled": 4505600, "num_env_steps_trained": 4505600, "num_agent_steps_sampled": 9011200, "num_agent_steps_trained": 9011200}, "done": false, "episodes_total": 11264, "training_iteration": 352, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-20", "timestamp": 1666581800, "time_this_iter_s": 3.6939845085144043, "time_total_s": 1348.2524123191833, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1348.2524123191833, "timesteps_since_restore": 0, "iterations_since_restore": 352, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.933333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.89, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.18, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.08, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.78, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.78, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.78, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005870356690138578, "policy_loss": 0.0002167444326914847, "vf_loss": 7.8559651374816895, "vf_explained_var": 0.5242763161659241, "kl": 0.002454460132867098, "entropy": 0.8306089639663696, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4518400, "num_env_steps_trained": 4518400, "num_agent_steps_sampled": 9036800, "num_agent_steps_trained": 9036800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 566.09, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.045}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.89, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.18, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.08, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.97, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.9, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.78, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.78, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.78, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990295269652824, "mean_inference_ms": 1.2490525243340393, "mean_action_processing_ms": 0.13397436423398662, "mean_env_wait_ms": 0.8412535322114826, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 566.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 231.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.045}, "hist_stats": {"episode_reward": [573.0, 579.0, 527.0, 579.0, 573.0, 627.0, 587.0, 582.0, 582.0, 579.0, 525.0, 530.0, 570.0, 582.0, 525.0, 570.0, 527.0, 582.0, 579.0, 582.0, 587.0, 513.0, 582.0, 582.0, 576.0, 582.0, 582.0, 570.0, 522.0, 582.0, 522.0, 630.0, 576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 287.0, 292.0, 255.0, 272.0, 289.0, 290.0, 281.0, 292.0, 311.0, 316.0, 287.0, 300.0, 297.0, 285.0, 289.0, 293.0, 289.0, 290.0, 270.0, 255.0, 261.0, 269.0, 281.0, 289.0, 289.0, 293.0, 258.0, 267.0, 271.0, 299.0, 260.0, 267.0, 288.0, 294.0, 284.0, 295.0, 290.0, 292.0, 293.0, 294.0, 253.0, 260.0, 289.0, 293.0, 289.0, 293.0, 288.0, 288.0, 290.0, 292.0, 288.0, 294.0, 286.0, 284.0, 262.0, 260.0, 292.0, 290.0, 262.0, 260.0, 315.0, 315.0, 288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6990295269652824, "mean_inference_ms": 1.2490525243340393, "mean_action_processing_ms": 0.13397436423398662, "mean_env_wait_ms": 0.8412535322114826, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9036800, "num_agent_steps_trained": 9036800, "num_env_steps_sampled": 4518400, "num_env_steps_trained": 4518400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4518400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9036800, "timers": {"training_iteration_time_ms": 3627.527, "learn_time_ms": 1151.632, "learn_throughput": 11114.666, "synch_weights_time_ms": 9.744}, "counters": {"num_env_steps_sampled": 4518400, "num_env_steps_trained": 4518400, "num_agent_steps_sampled": 9036800, "num_agent_steps_trained": 9036800}, "done": false, "episodes_total": 11296, "training_iteration": 353, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-24", "timestamp": 1666581804, "time_this_iter_s": 3.699242353439331, "time_total_s": 1351.9516546726227, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1351.9516546726227, "timesteps_since_restore": 0, "iterations_since_restore": 353, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.000000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.01, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013683558208867908, "policy_loss": -0.0017354553565382957, "vf_loss": 7.806475639343262, "vf_explained_var": 0.561039924621582, "kl": 0.004653509706258774, "entropy": 0.8270936608314514, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4531200, "num_env_steps_trained": 4531200, "num_agent_steps_sampled": 9062400, "num_agent_steps_trained": 9062400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 563.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.605}, "custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.01, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 4, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.98, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.07, "useful_onion_pickup_agent_0_min": 4, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.62, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.62, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.62, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6989651337371378, "mean_inference_ms": 1.2489964337375246, "mean_action_processing_ms": 0.13396585439248668, "mean_env_wait_ms": 0.8412787324349185, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 563.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.605}, "hist_stats": {"episode_reward": [576.0, 522.0, 573.0, 567.0, 582.0, 579.0, 573.0, 522.0, 579.0, 530.0, 570.0, 582.0, 581.0, 630.0, 584.0, 530.0, 582.0, 582.0, 579.0, 582.0, 576.0, 570.0, 513.0, 573.0, 576.0, 579.0, 579.0, 530.0, 573.0, 530.0, 530.0, 582.0, 465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 263.0, 259.0, 287.0, 286.0, 286.0, 281.0, 287.0, 295.0, 288.0, 291.0, 282.0, 291.0, 262.0, 260.0, 289.0, 290.0, 256.0, 274.0, 279.0, 291.0, 282.0, 300.0, 293.0, 288.0, 315.0, 315.0, 289.0, 295.0, 256.0, 274.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 284.0, 298.0, 290.0, 286.0, 280.0, 290.0, 258.0, 255.0, 281.0, 292.0, 289.0, 287.0, 287.0, 292.0, 290.0, 289.0, 272.0, 258.0, 283.0, 290.0, 272.0, 258.0, 261.0, 269.0, 293.0, 289.0, 234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6989651337371378, "mean_inference_ms": 1.2489964337375246, "mean_action_processing_ms": 0.13396585439248668, "mean_env_wait_ms": 0.8412787324349185, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9062400, "num_agent_steps_trained": 9062400, "num_env_steps_sampled": 4531200, "num_env_steps_trained": 4531200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4531200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9062400, "timers": {"training_iteration_time_ms": 3660.774, "learn_time_ms": 1163.312, "learn_throughput": 11003.067, "synch_weights_time_ms": 9.759}, "counters": {"num_env_steps_sampled": 4531200, "num_env_steps_trained": 4531200, "num_agent_steps_sampled": 9062400, "num_agent_steps_trained": 9062400}, "done": false, "episodes_total": 11328, "training_iteration": 354, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-28", "timestamp": 1666581808, "time_this_iter_s": 3.9559929370880127, "time_total_s": 1355.9076476097107, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1355.9076476097107, "timesteps_since_restore": 0, "iterations_since_restore": 354, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.759999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 175.01, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.94, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.55, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.55, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.55, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0020636606495827436, "policy_loss": -0.0024204510264098644, "vf_loss": 7.716197967529297, "vf_explained_var": 0.5518883466720581, "kl": 0.0022711479105055332, "entropy": 0.8296573162078857, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4544000, "num_env_steps_trained": 4544000, "num_agent_steps_sampled": 9088000, "num_agent_steps_trained": 9088000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 566.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.305}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 175.01, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.94, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.76, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.29, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.55, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.53, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.01, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.39, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.67, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.55, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.55, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6988876950800459, "mean_inference_ms": 1.248936325069807, "mean_action_processing_ms": 0.13395738623505635, "mean_env_wait_ms": 0.8413004695451081, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 566.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.305}, "hist_stats": {"episode_reward": [465.0, 525.0, 573.0, 576.0, 582.0, 579.0, 522.0, 576.0, 582.0, 564.0, 579.0, 519.0, 579.0, 530.0, 570.0, 579.0, 582.0, 522.0, 587.0, 579.0, 530.0, 567.0, 627.0, 522.0, 533.0, 576.0, 579.0, 570.0, 627.0, 579.0, 582.0, 579.0, 630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [234.0, 231.0, 262.0, 263.0, 285.0, 288.0, 287.0, 289.0, 290.0, 292.0, 285.0, 294.0, 261.0, 261.0, 284.0, 292.0, 288.0, 294.0, 287.0, 277.0, 287.0, 292.0, 253.0, 266.0, 292.0, 287.0, 259.0, 271.0, 281.0, 289.0, 286.0, 293.0, 289.0, 293.0, 258.0, 264.0, 288.0, 299.0, 279.0, 300.0, 260.0, 270.0, 291.0, 276.0, 312.0, 315.0, 264.0, 258.0, 269.0, 264.0, 279.0, 297.0, 287.0, 292.0, 280.0, 290.0, 321.0, 306.0, 288.0, 291.0, 291.0, 291.0, 276.0, 303.0, 318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6988876950800459, "mean_inference_ms": 1.248936325069807, "mean_action_processing_ms": 0.13395738623505635, "mean_env_wait_ms": 0.8413004695451081, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9088000, "num_agent_steps_trained": 9088000, "num_env_steps_sampled": 4544000, "num_env_steps_trained": 4544000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4544000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9088000, "timers": {"training_iteration_time_ms": 3655.033, "learn_time_ms": 1153.612, "learn_throughput": 11095.586, "synch_weights_time_ms": 9.777}, "counters": {"num_env_steps_sampled": 4544000, "num_env_steps_trained": 4544000, "num_agent_steps_sampled": 9088000, "num_agent_steps_trained": 9088000}, "done": false, "episodes_total": 11360, "training_iteration": 355, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-32", "timestamp": 1666581812, "time_this_iter_s": 3.6556270122528076, "time_total_s": 1359.5632746219635, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1359.5632746219635, "timesteps_since_restore": 0, "iterations_since_restore": 355, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.53333333333333, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 176.15, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.81, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.71, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.51, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.51, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.51, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013188866432756186, "policy_loss": 0.0009702978422865272, "vf_loss": 7.692901134490967, "vf_explained_var": 0.5492017269134521, "kl": 0.0021686244290322065, "entropy": 0.841401219367981, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4556800, "num_env_steps_trained": 4556800, "num_agent_steps_sampled": 9113600, "num_agent_steps_trained": 9113600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 570.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.475}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 176.15, "shaped_reward_min": 43, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.84, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.81, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.71, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.58, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.51, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.33, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.55, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.35, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.33, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.51, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.33, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.51, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.33, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698832879688069, "mean_inference_ms": 1.248881800163415, "mean_action_processing_ms": 0.133950154641813, "mean_env_wait_ms": 0.8413277649657975, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 123.0, "episode_reward_mean": 570.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.475}, "hist_stats": {"episode_reward": [630.0, 582.0, 582.0, 468.0, 582.0, 582.0, 576.0, 579.0, 582.0, 579.0, 576.0, 579.0, 582.0, 582.0, 522.0, 582.0, 519.0, 630.0, 582.0, 123.0, 459.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 584.0, 630.0, 579.0, 630.0, 579.0, 519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [318.0, 312.0, 288.0, 294.0, 285.0, 297.0, 234.0, 234.0, 291.0, 291.0, 290.0, 292.0, 297.0, 279.0, 280.0, 299.0, 293.0, 289.0, 286.0, 293.0, 287.0, 289.0, 296.0, 283.0, 288.0, 294.0, 290.0, 292.0, 260.0, 262.0, 289.0, 293.0, 257.0, 262.0, 316.0, 314.0, 292.0, 290.0, 60.0, 63.0, 225.0, 234.0, 291.0, 288.0, 295.0, 284.0, 261.0, 261.0, 291.0, 288.0, 288.0, 294.0, 285.0, 291.0, 295.0, 289.0, 310.0, 320.0, 285.0, 294.0, 313.0, 317.0, 292.0, 287.0, 250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698832879688069, "mean_inference_ms": 1.248881800163415, "mean_action_processing_ms": 0.133950154641813, "mean_env_wait_ms": 0.8413277649657975, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9113600, "num_agent_steps_trained": 9113600, "num_env_steps_sampled": 4556800, "num_env_steps_trained": 4556800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4556800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9113600, "timers": {"training_iteration_time_ms": 3671.445, "learn_time_ms": 1161.805, "learn_throughput": 11017.344, "synch_weights_time_ms": 9.891}, "counters": {"num_env_steps_sampled": 4556800, "num_env_steps_trained": 4556800, "num_agent_steps_sampled": 9113600, "num_agent_steps_trained": 9113600}, "done": false, "episodes_total": 11392, "training_iteration": 356, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-36", "timestamp": 1666581816, "time_this_iter_s": 3.731449842453003, "time_total_s": 1363.2947244644165, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1363.2947244644165, "timesteps_since_restore": 0, "iterations_since_restore": 356, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.259999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.99, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.75, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.65, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.43, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.43, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.43, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006175374146550894, "policy_loss": 0.0002533263177610934, "vf_loss": 7.782168388366699, "vf_explained_var": 0.52201247215271, "kl": 0.0021736156195402145, "entropy": 0.8280101418495178, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4569600, "num_env_steps_trained": 4569600, "num_agent_steps_sampled": 9139200, "num_agent_steps_trained": 9139200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 482.0, "episode_reward_mean": 574.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 235.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 287.095}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.99, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.75, "onion_pickup_agent_0_min": 3, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.97, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 28, "useful_onion_pickup_agent_0_mean": 14.65, "useful_onion_pickup_agent_0_min": 3, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.78, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 28, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 14.43, "potting_onion_agent_0_min": 3, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.48, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 27, "dish_pickup_agent_0_mean": 5.7, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.6, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.39, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.43, "optimal_onion_potting_agent_0_min": 3, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.48, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 27, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.43, "viable_onion_potting_agent_0_min": 3, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.48, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 27, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698796357014443, "mean_inference_ms": 1.2487321483893488, "mean_action_processing_ms": 0.13394084113340607, "mean_env_wait_ms": 0.8412337399649568, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 482.0, "episode_reward_mean": 574.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 235.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 287.095}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 584.0, 579.0, 573.0, 582.0, 579.0, 576.0, 522.0, 582.0, 530.0, 576.0, 573.0, 573.0, 525.0, 582.0, 587.0, 579.0, 582.0, 579.0, 579.0, 587.0, 576.0, 579.0, 627.0, 630.0, 630.0, 582.0, 573.0, 579.0, 579.0, 579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 269.0, 290.0, 292.0, 285.0, 297.0, 286.0, 298.0, 291.0, 288.0, 290.0, 283.0, 288.0, 294.0, 293.0, 286.0, 285.0, 291.0, 255.0, 267.0, 292.0, 290.0, 261.0, 269.0, 289.0, 287.0, 283.0, 290.0, 284.0, 289.0, 259.0, 266.0, 280.0, 302.0, 288.0, 299.0, 284.0, 295.0, 295.0, 287.0, 290.0, 289.0, 290.0, 289.0, 286.0, 301.0, 287.0, 289.0, 289.0, 290.0, 308.0, 319.0, 312.0, 318.0, 314.0, 316.0, 288.0, 294.0, 287.0, 286.0, 284.0, 295.0, 287.0, 292.0, 296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.698796357014443, "mean_inference_ms": 1.2487321483893488, "mean_action_processing_ms": 0.13394084113340607, "mean_env_wait_ms": 0.8412337399649568, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9139200, "num_agent_steps_trained": 9139200, "num_env_steps_sampled": 4569600, "num_env_steps_trained": 4569600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4569600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9139200, "timers": {"training_iteration_time_ms": 3679.359, "learn_time_ms": 1168.252, "learn_throughput": 10956.54, "synch_weights_time_ms": 9.969}, "counters": {"num_env_steps_sampled": 4569600, "num_env_steps_trained": 4569600, "num_agent_steps_sampled": 9139200, "num_agent_steps_trained": 9139200}, "done": false, "episodes_total": 11424, "training_iteration": 357, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-40", "timestamp": 1666581820, "time_this_iter_s": 3.717745304107666, "time_total_s": 1367.0124697685242, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1367.0124697685242, "timesteps_since_restore": 0, "iterations_since_restore": 357, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.49, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.12, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.82, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.82, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.82, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00033347378484904766, "policy_loss": -0.0006874024402350187, "vf_loss": 7.688575267791748, "vf_explained_var": 0.544007420539856, "kl": 0.0025333897210657597, "entropy": 0.8298560380935669, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4582400, "num_env_steps_trained": 4582400, "num_agent_steps_sampled": 9164800, "num_agent_steps_trained": 9164800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 572.09, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 286.045}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.49, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.12, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.82, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.02, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.91, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.25, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.82, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.82, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987775753081857, "mean_inference_ms": 1.2485649907058904, "mean_action_processing_ms": 0.1339327316237972, "mean_env_wait_ms": 0.8411460706100078, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 572.09, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 223.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 286.045}, "hist_stats": {"episode_reward": [579.0, 582.0, 570.0, 525.0, 584.0, 570.0, 576.0, 573.0, 576.0, 579.0, 576.0, 584.0, 582.0, 627.0, 582.0, 576.0, 630.0, 582.0, 579.0, 570.0, 522.0, 576.0, 582.0, 582.0, 525.0, 579.0, 576.0, 570.0, 579.0, 582.0, 573.0, 573.0, 525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 289.0, 293.0, 285.0, 285.0, 266.0, 259.0, 290.0, 294.0, 293.0, 277.0, 286.0, 290.0, 281.0, 292.0, 284.0, 292.0, 296.0, 283.0, 291.0, 285.0, 283.0, 301.0, 284.0, 298.0, 319.0, 308.0, 291.0, 291.0, 284.0, 292.0, 313.0, 317.0, 293.0, 289.0, 285.0, 294.0, 283.0, 287.0, 266.0, 256.0, 288.0, 288.0, 295.0, 287.0, 290.0, 292.0, 266.0, 259.0, 288.0, 291.0, 286.0, 290.0, 287.0, 283.0, 291.0, 288.0, 290.0, 292.0, 283.0, 290.0, 281.0, 292.0, 264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987775753081857, "mean_inference_ms": 1.2485649907058904, "mean_action_processing_ms": 0.1339327316237972, "mean_env_wait_ms": 0.8411460706100078, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9164800, "num_agent_steps_trained": 9164800, "num_env_steps_sampled": 4582400, "num_env_steps_trained": 4582400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4582400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9164800, "timers": {"training_iteration_time_ms": 3675.323, "learn_time_ms": 1165.406, "learn_throughput": 10983.294, "synch_weights_time_ms": 9.801}, "counters": {"num_env_steps_sampled": 4582400, "num_env_steps_trained": 4582400, "num_agent_steps_sampled": 9164800, "num_agent_steps_trained": 9164800}, "done": false, "episodes_total": 11456, "training_iteration": 358, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-44", "timestamp": 1666581824, "time_this_iter_s": 3.722245454788208, "time_total_s": 1370.7347152233124, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1370.7347152233124, "timesteps_since_restore": 0, "iterations_since_restore": 358, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.96, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.96, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.79, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.61, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003429124830290675, "policy_loss": -0.0037802057340741158, "vf_loss": 7.628129482269287, "vf_explained_var": 0.5590401887893677, "kl": 0.002472866792231798, "entropy": 0.823464572429657, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4595200, "num_env_steps_trained": 4595200, "num_agent_steps_sampled": 9190400, "num_agent_steps_trained": 9190400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 570.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.095}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 128, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.96, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.79, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.61, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 3, "potting_onion_agent_0_mean": 15.52, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.38, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.89, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.52, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.38, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.52, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.38, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987291256930882, "mean_inference_ms": 1.248387604510305, "mean_action_processing_ms": 0.13392288763838628, "mean_env_wait_ms": 0.8410501699452928, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 408.0, "episode_reward_mean": 570.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.095}, "hist_stats": {"episode_reward": [525.0, 627.0, 582.0, 576.0, 576.0, 579.0, 582.0, 524.0, 582.0, 582.0, 573.0, 579.0, 522.0, 573.0, 579.0, 630.0, 630.0, 530.0, 627.0, 579.0, 530.0, 525.0, 582.0, 573.0, 576.0, 573.0, 579.0, 582.0, 570.0, 630.0, 482.0, 519.0, 582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [264.0, 261.0, 318.0, 309.0, 285.0, 297.0, 285.0, 291.0, 289.0, 287.0, 291.0, 288.0, 291.0, 291.0, 250.0, 274.0, 291.0, 291.0, 296.0, 286.0, 284.0, 289.0, 289.0, 290.0, 264.0, 258.0, 284.0, 289.0, 284.0, 295.0, 313.0, 317.0, 317.0, 313.0, 265.0, 265.0, 320.0, 307.0, 287.0, 292.0, 269.0, 261.0, 262.0, 263.0, 288.0, 294.0, 280.0, 293.0, 292.0, 284.0, 286.0, 287.0, 285.0, 294.0, 287.0, 295.0, 278.0, 292.0, 314.0, 316.0, 235.0, 247.0, 259.0, 260.0, 289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6987291256930882, "mean_inference_ms": 1.248387604510305, "mean_action_processing_ms": 0.13392288763838628, "mean_env_wait_ms": 0.8410501699452928, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9190400, "num_agent_steps_trained": 9190400, "num_env_steps_sampled": 4595200, "num_env_steps_trained": 4595200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4595200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9190400, "timers": {"training_iteration_time_ms": 3669.82, "learn_time_ms": 1154.499, "learn_throughput": 11087.057, "synch_weights_time_ms": 9.735}, "counters": {"num_env_steps_sampled": 4595200, "num_env_steps_trained": 4595200, "num_agent_steps_sampled": 9190400, "num_agent_steps_trained": 9190400}, "done": false, "episodes_total": 11488, "training_iteration": 359, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-47", "timestamp": 1666581827, "time_this_iter_s": 3.7149648666381836, "time_total_s": 1374.4496800899506, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1374.4496800899506, "timesteps_since_restore": 0, "iterations_since_restore": 359, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.46666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.58, "shaped_reward_min": 128, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.01, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.97, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.8, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.79, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.54, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.54, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.54, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007686268072575331, "policy_loss": -0.001121054170653224, "vf_loss": 7.592923164367676, "vf_explained_var": 0.575827956199646, "kl": 0.002229448640719056, "entropy": 0.8137279748916626, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4608000, "num_env_steps_trained": 4608000, "num_agent_steps_sampled": 9216000, "num_agent_steps_trained": 9216000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 408.0, "episode_reward_mean": 573.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.59}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.58, "shaped_reward_min": 128, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.01, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.97, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.8, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.79, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.54, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.54, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.54, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6986628664827095, "mean_inference_ms": 1.248223427178441, "mean_action_processing_ms": 0.13391442963440256, "mean_env_wait_ms": 0.8409629142801245, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 408.0, "episode_reward_mean": 573.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.59}, "hist_stats": {"episode_reward": [582.0, 567.0, 579.0, 525.0, 582.0, 579.0, 465.0, 582.0, 582.0, 573.0, 579.0, 533.0, 570.0, 579.0, 573.0, 579.0, 579.0, 573.0, 582.0, 582.0, 582.0, 573.0, 582.0, 570.0, 579.0, 576.0, 582.0, 576.0, 579.0, 582.0, 587.0, 579.0, 579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 280.0, 287.0, 293.0, 286.0, 267.0, 258.0, 290.0, 292.0, 286.0, 293.0, 242.0, 223.0, 295.0, 287.0, 292.0, 290.0, 289.0, 284.0, 288.0, 291.0, 260.0, 273.0, 288.0, 282.0, 285.0, 294.0, 283.0, 290.0, 290.0, 289.0, 288.0, 291.0, 291.0, 282.0, 288.0, 294.0, 284.0, 298.0, 292.0, 290.0, 283.0, 290.0, 293.0, 289.0, 281.0, 289.0, 289.0, 290.0, 291.0, 285.0, 293.0, 289.0, 290.0, 286.0, 287.0, 292.0, 288.0, 294.0, 285.0, 302.0, 286.0, 293.0, 295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6986628664827095, "mean_inference_ms": 1.248223427178441, "mean_action_processing_ms": 0.13391442963440256, "mean_env_wait_ms": 0.8409629142801245, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9216000, "num_agent_steps_trained": 9216000, "num_env_steps_sampled": 4608000, "num_env_steps_trained": 4608000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4608000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9216000, "timers": {"training_iteration_time_ms": 3663.145, "learn_time_ms": 1152.436, "learn_throughput": 11106.91, "synch_weights_time_ms": 9.775}, "counters": {"num_env_steps_sampled": 4608000, "num_env_steps_trained": 4608000, "num_agent_steps_sampled": 9216000, "num_agent_steps_trained": 9216000}, "done": false, "episodes_total": 11520, "training_iteration": 360, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-51", "timestamp": 1666581831, "time_this_iter_s": 3.6528384685516357, "time_total_s": 1378.1025185585022, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1378.1025185585022, "timesteps_since_restore": 0, "iterations_since_restore": 360, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.48, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 178.1, "shaped_reward_min": 128, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.6, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016958118649199605, "policy_loss": -0.0020420937798917294, "vf_loss": 7.6369242668151855, "vf_explained_var": 0.5737049579620361, "kl": 0.0027597371954470873, "entropy": 0.8348207473754883, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4620800, "num_env_steps_trained": 4620800, "num_agent_steps_sampled": 9241600, "num_agent_steps_trained": 9241600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 408.0, "episode_reward_mean": 574.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.05}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 178.1, "shaped_reward_min": 128, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.6, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.44, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.23, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.47, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.74, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.23, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.23, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6985839313465746, "mean_inference_ms": 1.2480389289272427, "mean_action_processing_ms": 0.13390556369993323, "mean_env_wait_ms": 0.8408671873955664, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 408.0, "episode_reward_mean": 574.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 203.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.05}, "hist_stats": {"episode_reward": [579.0, 530.0, 530.0, 579.0, 579.0, 579.0, 582.0, 576.0, 582.0, 570.0, 579.0, 582.0, 582.0, 630.0, 579.0, 579.0, 582.0, 519.0, 579.0, 587.0, 587.0, 573.0, 576.0, 587.0, 408.0, 522.0, 579.0, 579.0, 579.0, 582.0, 581.0, 582.0, 579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 284.0, 264.0, 266.0, 264.0, 266.0, 281.0, 298.0, 293.0, 286.0, 290.0, 289.0, 286.0, 296.0, 291.0, 285.0, 287.0, 295.0, 283.0, 287.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 314.0, 316.0, 290.0, 289.0, 292.0, 287.0, 290.0, 292.0, 260.0, 259.0, 288.0, 291.0, 295.0, 292.0, 296.0, 291.0, 290.0, 283.0, 281.0, 295.0, 290.0, 297.0, 203.0, 205.0, 255.0, 267.0, 290.0, 289.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 281.0, 300.0, 291.0, 291.0, 284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6985839313465746, "mean_inference_ms": 1.2480389289272427, "mean_action_processing_ms": 0.13390556369993323, "mean_env_wait_ms": 0.8408671873955664, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9241600, "num_agent_steps_trained": 9241600, "num_env_steps_sampled": 4620800, "num_env_steps_trained": 4620800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4620800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9241600, "timers": {"training_iteration_time_ms": 3654.587, "learn_time_ms": 1141.059, "learn_throughput": 11217.65, "synch_weights_time_ms": 10.314}, "counters": {"num_env_steps_sampled": 4620800, "num_env_steps_trained": 4620800, "num_agent_steps_sampled": 9241600, "num_agent_steps_trained": 9241600}, "done": false, "episodes_total": 11552, "training_iteration": 361, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-55", "timestamp": 1666581835, "time_this_iter_s": 3.6370046138763428, "time_total_s": 1381.7395231723785, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1381.7395231723785, "timesteps_since_restore": 0, "iterations_since_restore": 361, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.31, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002034829929471016, "policy_loss": 0.0016710225027054548, "vf_loss": 7.742871284484863, "vf_explained_var": 0.5606523752212524, "kl": 0.0033344109542667866, "entropy": 0.8209579586982727, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4633600, "num_env_steps_trained": 4633600, "num_agent_steps_sampled": 9267200, "num_agent_steps_trained": 9267200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 473.0, "episode_reward_mean": 573.71, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.855}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.31, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.14, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.73, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.95, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.75, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.78, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.58, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.75, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.75, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6985101619700507, "mean_inference_ms": 1.2478661336702375, "mean_action_processing_ms": 0.1338990622465829, "mean_env_wait_ms": 0.8407821499189544, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 473.0, "episode_reward_mean": 573.71, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.855}, "hist_stats": {"episode_reward": [579.0, 487.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 636.0, 582.0, 579.0, 576.0, 576.0, 582.0, 584.0, 579.0, 633.0, 579.0, 530.0, 579.0, 570.0, 576.0, 576.0, 630.0, 579.0, 579.0, 579.0, 576.0, 582.0, 579.0, 587.0, 581.0, 576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 295.0, 245.0, 242.0, 290.0, 292.0, 285.0, 297.0, 288.0, 282.0, 291.0, 291.0, 288.0, 288.0, 294.0, 285.0, 318.0, 318.0, 286.0, 296.0, 286.0, 293.0, 288.0, 288.0, 295.0, 281.0, 292.0, 290.0, 292.0, 292.0, 287.0, 292.0, 312.0, 321.0, 292.0, 287.0, 268.0, 262.0, 289.0, 290.0, 288.0, 282.0, 281.0, 295.0, 281.0, 295.0, 316.0, 314.0, 291.0, 288.0, 291.0, 288.0, 287.0, 292.0, 290.0, 286.0, 289.0, 293.0, 285.0, 294.0, 294.0, 293.0, 294.0, 287.0, 282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6985101619700507, "mean_inference_ms": 1.2478661336702375, "mean_action_processing_ms": 0.1338990622465829, "mean_env_wait_ms": 0.8407821499189544, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9267200, "num_agent_steps_trained": 9267200, "num_env_steps_sampled": 4633600, "num_env_steps_trained": 4633600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4633600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9267200, "timers": {"training_iteration_time_ms": 3658.002, "learn_time_ms": 1132.247, "learn_throughput": 11304.955, "synch_weights_time_ms": 10.373}, "counters": {"num_env_steps_sampled": 4633600, "num_env_steps_trained": 4633600, "num_agent_steps_sampled": 9267200, "num_agent_steps_trained": 9267200}, "done": false, "episodes_total": 11584, "training_iteration": 362, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-23-59", "timestamp": 1666581839, "time_this_iter_s": 3.719611883163452, "time_total_s": 1385.459135055542, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1385.459135055542, "timesteps_since_restore": 0, "iterations_since_restore": 362, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.84, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.65, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.86, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.94, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.48, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.73, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.48, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.48, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0033650160767138004, "policy_loss": 0.0030029851477593184, "vf_loss": 7.754143714904785, "vf_explained_var": 0.5768305659294128, "kl": 0.002860500942915678, "entropy": 0.8267655372619629, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4646400, "num_env_steps_trained": 4646400, "num_agent_steps_sampled": 9292800, "num_agent_steps_trained": 9292800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 473.0, "episode_reward_mean": 572.25, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 286.125}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.65, "shaped_reward_min": 153, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.86, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.94, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.86, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.48, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 25, "potting_onion_agent_1_mean": 17.57, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.73, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.58, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.46, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.44, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.47, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.48, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 25, "optimal_onion_potting_agent_1_mean": 17.57, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.48, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 25, "viable_onion_potting_agent_1_mean": 17.57, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6984367152375701, "mean_inference_ms": 1.2476924170403942, "mean_action_processing_ms": 0.13389245451619106, "mean_env_wait_ms": 0.8406926516686911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 473.0, "episode_reward_mean": 572.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 286.125}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 536.0, 573.0, 579.0, 579.0, 582.0, 579.0, 582.0, 579.0, 579.0, 522.0, 579.0, 582.0, 582.0, 582.0, 570.0, 582.0, 627.0, 530.0, 582.0, 579.0, 582.0, 579.0, 473.0, 576.0, 579.0, 587.0, 582.0, 582.0, 579.0, 627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 294.0, 285.0, 291.0, 286.0, 287.0, 266.0, 270.0, 283.0, 290.0, 288.0, 291.0, 291.0, 288.0, 291.0, 291.0, 293.0, 286.0, 288.0, 294.0, 286.0, 293.0, 294.0, 285.0, 256.0, 266.0, 284.0, 295.0, 289.0, 293.0, 290.0, 292.0, 290.0, 292.0, 291.0, 279.0, 289.0, 293.0, 311.0, 316.0, 258.0, 272.0, 293.0, 289.0, 290.0, 289.0, 288.0, 294.0, 285.0, 294.0, 234.0, 239.0, 287.0, 289.0, 283.0, 296.0, 298.0, 289.0, 280.0, 302.0, 286.0, 296.0, 291.0, 288.0, 320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6984367152375701, "mean_inference_ms": 1.2476924170403942, "mean_action_processing_ms": 0.13389245451619106, "mean_env_wait_ms": 0.8406926516686911, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9292800, "num_agent_steps_trained": 9292800, "num_env_steps_sampled": 4646400, "num_env_steps_trained": 4646400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4646400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9292800, "timers": {"training_iteration_time_ms": 3657.561, "learn_time_ms": 1131.754, "learn_throughput": 11309.877, "synch_weights_time_ms": 10.997}, "counters": {"num_env_steps_sampled": 4646400, "num_env_steps_trained": 4646400, "num_agent_steps_sampled": 9292800, "num_agent_steps_trained": 9292800}, "done": false, "episodes_total": 11616, "training_iteration": 363, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-03", "timestamp": 1666581843, "time_this_iter_s": 3.6965436935424805, "time_total_s": 1389.1556787490845, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1389.1556787490845, "timesteps_since_restore": 0, "iterations_since_restore": 363, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.516666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.97, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001889953389763832, "policy_loss": 0.0015104906633496284, "vf_loss": 7.811895847320557, "vf_explained_var": 0.5815200805664062, "kl": 0.004921192303299904, "entropy": 0.803455650806427, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4659200, "num_env_steps_trained": 4659200, "num_agent_steps_sampled": 9318400, "num_agent_steps_trained": 9318400}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 563.17, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.585}, "custom_metrics": {"sparse_reward_mean": 194.6, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 173.97, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.13, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.29, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 27, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.2, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 27, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.85, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.33, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.55, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.21, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.52, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6983602775014129, "mean_inference_ms": 1.2474985858875547, "mean_action_processing_ms": 0.13388312760671892, "mean_env_wait_ms": 0.8405829253975301, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 563.17, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.585}, "hist_stats": {"episode_reward": [627.0, 582.0, 525.0, 582.0, 522.0, 570.0, 582.0, 576.0, 525.0, 570.0, 570.0, 579.0, 525.0, 579.0, 579.0, 576.0, 525.0, 579.0, 579.0, 582.0, 576.0, 579.0, 624.0, 579.0, 570.0, 576.0, 513.0, 579.0, 570.0, 582.0, 582.0, 579.0, 576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 307.0, 285.0, 297.0, 262.0, 263.0, 292.0, 290.0, 263.0, 259.0, 276.0, 294.0, 293.0, 289.0, 277.0, 299.0, 266.0, 259.0, 286.0, 284.0, 292.0, 278.0, 283.0, 296.0, 260.0, 265.0, 297.0, 282.0, 293.0, 286.0, 294.0, 282.0, 262.0, 263.0, 287.0, 292.0, 290.0, 289.0, 293.0, 289.0, 293.0, 283.0, 294.0, 285.0, 313.0, 311.0, 291.0, 288.0, 295.0, 275.0, 293.0, 283.0, 249.0, 264.0, 294.0, 285.0, 278.0, 292.0, 286.0, 296.0, 287.0, 295.0, 296.0, 283.0, 286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6983602775014129, "mean_inference_ms": 1.2474985858875547, "mean_action_processing_ms": 0.13388312760671892, "mean_env_wait_ms": 0.8405829253975301, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9318400, "num_agent_steps_trained": 9318400, "num_env_steps_sampled": 4659200, "num_env_steps_trained": 4659200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4659200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9318400, "timers": {"training_iteration_time_ms": 3621.735, "learn_time_ms": 1123.322, "learn_throughput": 11394.777, "synch_weights_time_ms": 11.703}, "counters": {"num_env_steps_sampled": 4659200, "num_env_steps_trained": 4659200, "num_agent_steps_sampled": 9318400, "num_agent_steps_trained": 9318400}, "done": false, "episodes_total": 11648, "training_iteration": 364, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-07", "timestamp": 1666581847, "time_this_iter_s": 3.602010726928711, "time_total_s": 1392.7576894760132, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1392.7576894760132, "timesteps_since_restore": 0, "iterations_since_restore": 364, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.020000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.76, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.81, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.81, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.81, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001957516185939312, "policy_loss": 0.001595453592017293, "vf_loss": 7.755298614501953, "vf_explained_var": 0.5773959159851074, "kl": 0.0023900310043245554, "entropy": 0.826931357383728, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4672000, "num_env_steps_trained": 4672000, "num_agent_steps_sampled": 9344000, "num_agent_steps_trained": 9344000}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 563.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.78}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.76, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.19, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.08, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.99, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.81, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.32, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.99, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.81, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.99, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.81, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6982781829858854, "mean_inference_ms": 1.2473055426051993, "mean_action_processing_ms": 0.13387086509086388, "mean_env_wait_ms": 0.8404633684404762, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 563.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.78}, "hist_stats": {"episode_reward": [576.0, 576.0, 522.0, 579.0, 576.0, 579.0, 573.0, 576.0, 579.0, 525.0, 573.0, 522.0, 579.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 522.0, 576.0, 579.0, 573.0, 587.0, 579.0, 573.0, 579.0, 587.0, 576.0, 582.0, 579.0, 627.0, 579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 290.0, 288.0, 288.0, 258.0, 264.0, 296.0, 283.0, 292.0, 284.0, 281.0, 298.0, 291.0, 282.0, 294.0, 282.0, 293.0, 286.0, 260.0, 265.0, 285.0, 288.0, 258.0, 264.0, 287.0, 292.0, 293.0, 286.0, 287.0, 292.0, 290.0, 292.0, 288.0, 291.0, 290.0, 292.0, 287.0, 295.0, 265.0, 257.0, 278.0, 298.0, 290.0, 289.0, 272.0, 301.0, 288.0, 299.0, 288.0, 291.0, 287.0, 286.0, 291.0, 288.0, 293.0, 294.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 311.0, 316.0, 286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6982781829858854, "mean_inference_ms": 1.2473055426051993, "mean_action_processing_ms": 0.13387086509086388, "mean_env_wait_ms": 0.8404633684404762, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9344000, "num_agent_steps_trained": 9344000, "num_env_steps_sampled": 4672000, "num_env_steps_trained": 4672000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4672000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9344000, "timers": {"training_iteration_time_ms": 3618.683, "learn_time_ms": 1129.566, "learn_throughput": 11331.783, "synch_weights_time_ms": 12.572}, "counters": {"num_env_steps_sampled": 4672000, "num_env_steps_trained": 4672000, "num_agent_steps_sampled": 9344000, "num_agent_steps_trained": 9344000}, "done": false, "episodes_total": 11680, "training_iteration": 365, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-10", "timestamp": 1666581850, "time_this_iter_s": 3.625788450241089, "time_total_s": 1396.3834779262543, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1396.3834779262543, "timesteps_since_restore": 0, "iterations_since_restore": 365, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.933333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.63, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.92, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.27, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.51, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.27, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.51, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.27, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.51, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009442369337193668, "policy_loss": -0.0013090715510770679, "vf_loss": 7.695399284362793, "vf_explained_var": 0.572861909866333, "kl": 0.002738791285082698, "entropy": 0.8094083666801453, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4684800, "num_env_steps_trained": 4684800, "num_agent_steps_sampled": 9369600, "num_agent_steps_trained": 9369600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 562.23, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.115}, "custom_metrics": {"sparse_reward_mean": 193.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 174.63, "shaped_reward_min": 74, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.63, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 16.92, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 16.77, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.27, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.51, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.69, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.27, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.51, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.27, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.51, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6982017257399036, "mean_inference_ms": 1.2471213356037625, "mean_action_processing_ms": 0.13385877685009473, "mean_env_wait_ms": 0.8403536456725641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 234.0, "episode_reward_mean": 562.23, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 114.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.115}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 579.0, 522.0, 579.0, 582.0, 576.0, 582.0, 579.0, 582.0, 525.0, 576.0, 519.0, 570.0, 576.0, 579.0, 582.0, 533.0, 579.0, 582.0, 525.0, 573.0, 582.0, 262.0, 582.0, 234.0, 582.0, 525.0, 510.0, 579.0, 579.0, 522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 291.0, 288.0, 283.0, 296.0, 292.0, 287.0, 261.0, 261.0, 287.0, 292.0, 286.0, 296.0, 286.0, 290.0, 293.0, 289.0, 294.0, 285.0, 294.0, 288.0, 256.0, 269.0, 285.0, 291.0, 263.0, 256.0, 292.0, 278.0, 279.0, 297.0, 289.0, 290.0, 287.0, 295.0, 270.0, 263.0, 290.0, 289.0, 292.0, 290.0, 256.0, 269.0, 285.0, 288.0, 290.0, 292.0, 128.0, 134.0, 290.0, 292.0, 114.0, 120.0, 294.0, 288.0, 254.0, 271.0, 254.0, 256.0, 289.0, 290.0, 288.0, 291.0, 253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6982017257399036, "mean_inference_ms": 1.2471213356037625, "mean_action_processing_ms": 0.13385877685009473, "mean_env_wait_ms": 0.8403536456725641, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9369600, "num_agent_steps_trained": 9369600, "num_env_steps_sampled": 4684800, "num_env_steps_trained": 4684800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4684800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9369600, "timers": {"training_iteration_time_ms": 3608.26, "learn_time_ms": 1122.231, "learn_throughput": 11405.851, "synch_weights_time_ms": 12.679}, "counters": {"num_env_steps_sampled": 4684800, "num_env_steps_trained": 4684800, "num_agent_steps_sampled": 9369600, "num_agent_steps_trained": 9369600}, "done": false, "episodes_total": 11712, "training_iteration": 366, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-14", "timestamp": 1666581854, "time_this_iter_s": 3.619035005569458, "time_total_s": 1400.0025129318237, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1400.0025129318237, "timesteps_since_restore": 0, "iterations_since_restore": 366, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.5, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.29, "shaped_reward_min": 111, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.33, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011664158664643764, "policy_loss": -0.001532219466753304, "vf_loss": 7.772946357727051, "vf_explained_var": 0.5732956528663635, "kl": 0.002272401936352253, "entropy": 0.8229776620864868, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4697600, "num_env_steps_trained": 4697600, "num_agent_steps_sampled": 9395200, "num_agent_steps_trained": 9395200}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 351.0, "episode_reward_mean": 568.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.345}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.29, "shaped_reward_min": 111, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 25, "onion_pickup_agent_1_mean": 17.33, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 25, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.1, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.9, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.87, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.33, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.1, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.9, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.1, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.9, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6981296342327503, "mean_inference_ms": 1.246966280399733, "mean_action_processing_ms": 0.1338488189025943, "mean_env_wait_ms": 0.8402657892625757, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 351.0, "episode_reward_mean": 568.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.345}, "hist_stats": {"episode_reward": [522.0, 582.0, 579.0, 582.0, 587.0, 576.0, 582.0, 525.0, 530.0, 582.0, 582.0, 579.0, 579.0, 522.0, 587.0, 579.0, 627.0, 573.0, 573.0, 522.0, 582.0, 579.0, 579.0, 582.0, 582.0, 579.0, 536.0, 582.0, 587.0, 576.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 269.0, 289.0, 293.0, 288.0, 291.0, 292.0, 290.0, 297.0, 290.0, 294.0, 282.0, 293.0, 289.0, 261.0, 264.0, 272.0, 258.0, 290.0, 292.0, 290.0, 292.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 296.0, 291.0, 276.0, 303.0, 323.0, 304.0, 284.0, 289.0, 294.0, 279.0, 270.0, 252.0, 291.0, 291.0, 293.0, 286.0, 296.0, 283.0, 293.0, 289.0, 289.0, 293.0, 291.0, 288.0, 272.0, 264.0, 288.0, 294.0, 294.0, 293.0, 293.0, 283.0, 270.0, 260.0, 288.0, 285.0, 292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6981296342327503, "mean_inference_ms": 1.246966280399733, "mean_action_processing_ms": 0.1338488189025943, "mean_env_wait_ms": 0.8402657892625757, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9395200, "num_agent_steps_trained": 9395200, "num_env_steps_sampled": 4697600, "num_env_steps_trained": 4697600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4697600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9395200, "timers": {"training_iteration_time_ms": 3606.873, "learn_time_ms": 1118.554, "learn_throughput": 11443.345, "synch_weights_time_ms": 13.061}, "counters": {"num_env_steps_sampled": 4697600, "num_env_steps_trained": 4697600, "num_agent_steps_sampled": 9395200, "num_agent_steps_trained": 9395200}, "done": false, "episodes_total": 11744, "training_iteration": 367, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-18", "timestamp": 1666581858, "time_this_iter_s": 3.683014392852783, "time_total_s": 1403.6855273246765, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1403.6855273246765, "timesteps_since_restore": 0, "iterations_since_restore": 367, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.86, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.32, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.94, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.94, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.94, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005987448384985328, "policy_loss": -0.0009642803925089538, "vf_loss": 7.757267475128174, "vf_explained_var": 0.5559936165809631, "kl": 0.0024835034273564816, "entropy": 0.8203800916671753, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4710400, "num_env_steps_trained": 4710400, "num_agent_steps_sampled": 9420800, "num_agent_steps_trained": 9420800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 566.92, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 283.46}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.32, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.33, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.15, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.94, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.03, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.94, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.94, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6980672334992584, "mean_inference_ms": 1.2468138773384507, "mean_action_processing_ms": 0.13384109072673375, "mean_env_wait_ms": 0.8401887887646349, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 566.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 283.46}, "hist_stats": {"episode_reward": [579.0, 576.0, 579.0, 576.0, 582.0, 576.0, 587.0, 579.0, 476.0, 573.0, 522.0, 579.0, 522.0, 582.0, 579.0, 519.0, 579.0, 579.0, 579.0, 582.0, 576.0, 525.0, 582.0, 576.0, 584.0, 579.0, 579.0, 579.0, 570.0, 570.0, 582.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 287.0, 289.0, 288.0, 291.0, 291.0, 285.0, 291.0, 291.0, 283.0, 293.0, 287.0, 300.0, 285.0, 294.0, 236.0, 240.0, 285.0, 288.0, 262.0, 260.0, 294.0, 285.0, 257.0, 265.0, 289.0, 293.0, 290.0, 289.0, 257.0, 262.0, 294.0, 285.0, 293.0, 286.0, 294.0, 285.0, 281.0, 301.0, 287.0, 289.0, 269.0, 256.0, 288.0, 294.0, 281.0, 295.0, 295.0, 289.0, 290.0, 289.0, 289.0, 290.0, 280.0, 299.0, 292.0, 278.0, 286.0, 284.0, 289.0, 293.0, 298.0, 284.0, 297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6980672334992584, "mean_inference_ms": 1.2468138773384507, "mean_action_processing_ms": 0.13384109072673375, "mean_env_wait_ms": 0.8401887887646349, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9420800, "num_agent_steps_trained": 9420800, "num_env_steps_sampled": 4710400, "num_env_steps_trained": 4710400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4710400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9420800, "timers": {"training_iteration_time_ms": 3605.349, "learn_time_ms": 1118.749, "learn_throughput": 11441.347, "synch_weights_time_ms": 12.614}, "counters": {"num_env_steps_sampled": 4710400, "num_env_steps_trained": 4710400, "num_agent_steps_sampled": 9420800, "num_agent_steps_trained": 9420800}, "done": false, "episodes_total": 11776, "training_iteration": 368, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-22", "timestamp": 1666581862, "time_this_iter_s": 3.71199893951416, "time_total_s": 1407.3975262641907, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1407.3975262641907, "timesteps_since_restore": 0, "iterations_since_restore": 368, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.42, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.08, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.92, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0002802152303047478, "policy_loss": -0.0006437780102714896, "vf_loss": 7.782103538513184, "vf_explained_var": 0.5572320818901062, "kl": 0.002535460516810417, "entropy": 0.8292930126190186, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4723200, "num_env_steps_trained": 4723200, "num_agent_steps_sampled": 9446400, "num_agent_steps_trained": 9446400}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 564.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 282.21}, "custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.42, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.08, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.92, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.73, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.72, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.7, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.73, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.73, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6979899684883932, "mean_inference_ms": 1.246757177203936, "mean_action_processing_ms": 0.1338305857604492, "mean_env_wait_ms": 0.8401893480231749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 564.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 169.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 282.21}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 579.0, 576.0, 579.0, 525.0, 581.0, 582.0, 584.0, 582.0, 522.0, 570.0, 579.0, 579.0, 582.0, 351.0, 579.0, 522.0, 582.0, 573.0, 576.0, 579.0, 576.0, 582.0, 582.0, 573.0, 627.0, 525.0, 582.0, 582.0, 573.0, 573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 285.0, 291.0, 291.0, 286.0, 296.0, 287.0, 292.0, 284.0, 292.0, 290.0, 289.0, 263.0, 262.0, 291.0, 290.0, 290.0, 292.0, 298.0, 286.0, 290.0, 292.0, 269.0, 253.0, 290.0, 280.0, 288.0, 291.0, 287.0, 292.0, 294.0, 288.0, 169.0, 182.0, 288.0, 291.0, 262.0, 260.0, 295.0, 287.0, 281.0, 292.0, 286.0, 290.0, 291.0, 288.0, 287.0, 289.0, 290.0, 292.0, 291.0, 291.0, 287.0, 286.0, 315.0, 312.0, 257.0, 268.0, 290.0, 292.0, 289.0, 293.0, 286.0, 287.0, 287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6979899684883932, "mean_inference_ms": 1.246757177203936, "mean_action_processing_ms": 0.1338305857604492, "mean_env_wait_ms": 0.8401893480231749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9446400, "num_agent_steps_trained": 9446400, "num_env_steps_sampled": 4723200, "num_env_steps_trained": 4723200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4723200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9446400, "timers": {"training_iteration_time_ms": 3628.601, "learn_time_ms": 1118.822, "learn_throughput": 11440.607, "synch_weights_time_ms": 13.16}, "counters": {"num_env_steps_sampled": 4723200, "num_env_steps_trained": 4723200, "num_agent_steps_sampled": 9446400, "num_agent_steps_trained": 9446400}, "done": false, "episodes_total": 11808, "training_iteration": 369, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-26", "timestamp": 1666581866, "time_this_iter_s": 3.945258617401123, "time_total_s": 1411.3427848815918, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1411.3427848815918, "timesteps_since_restore": 0, "iterations_since_restore": 369, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.26666666666667, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.98, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.52, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.56, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.56, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.56, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000858169689308852, "policy_loss": 0.00048363849055022, "vf_loss": 7.798517227172852, "vf_explained_var": 0.5516868829727173, "kl": 0.003202717285603285, "entropy": 0.8106404542922974, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4736000, "num_env_steps_trained": 4736000, "num_agent_steps_sampled": 9472000, "num_agent_steps_trained": 9472000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 563.58, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.79}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.98, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.52, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.56, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 26, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.04, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.89, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.81, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.77, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.56, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 26, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.56, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 26, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6979214666795684, "mean_inference_ms": 1.2466894042982537, "mean_action_processing_ms": 0.13382035056435135, "mean_env_wait_ms": 0.8401823475867125, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 563.58, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.79}, "hist_stats": {"episode_reward": [573.0, 582.0, 582.0, 576.0, 579.0, 630.0, 539.0, 570.0, 579.0, 582.0, 579.0, 579.0, 570.0, 525.0, 579.0, 530.0, 579.0, 573.0, 579.0, 579.0, 579.0, 573.0, 519.0, 579.0, 522.0, 579.0, 579.0, 582.0, 465.0, 582.0, 522.0, 573.0, 579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 289.0, 293.0, 282.0, 300.0, 285.0, 291.0, 295.0, 284.0, 320.0, 310.0, 267.0, 272.0, 288.0, 282.0, 280.0, 299.0, 288.0, 294.0, 288.0, 291.0, 285.0, 294.0, 282.0, 288.0, 258.0, 267.0, 281.0, 298.0, 257.0, 273.0, 293.0, 286.0, 283.0, 290.0, 288.0, 291.0, 285.0, 294.0, 287.0, 292.0, 286.0, 287.0, 256.0, 263.0, 284.0, 295.0, 261.0, 261.0, 290.0, 289.0, 282.0, 297.0, 291.0, 291.0, 237.0, 228.0, 288.0, 294.0, 261.0, 261.0, 281.0, 292.0, 289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6979214666795684, "mean_inference_ms": 1.2466894042982537, "mean_action_processing_ms": 0.13382035056435135, "mean_env_wait_ms": 0.8401823475867125, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9472000, "num_agent_steps_trained": 9472000, "num_env_steps_sampled": 4736000, "num_env_steps_trained": 4736000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4736000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9472000, "timers": {"training_iteration_time_ms": 3638.398, "learn_time_ms": 1117.244, "learn_throughput": 11456.764, "synch_weights_time_ms": 13.584}, "counters": {"num_env_steps_sampled": 4736000, "num_env_steps_trained": 4736000, "num_agent_steps_sampled": 9472000, "num_agent_steps_trained": 9472000}, "done": false, "episodes_total": 11840, "training_iteration": 370, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-30", "timestamp": 1666581870, "time_this_iter_s": 3.7514426708221436, "time_total_s": 1415.094227552414, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1415.094227552414, "timesteps_since_restore": 0, "iterations_since_restore": 370, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.939999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 174.44, "shaped_reward_min": 94, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.12, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.27, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002380042104050517, "policy_loss": 0.002013041637837887, "vf_loss": 7.760648727416992, "vf_explained_var": 0.546655535697937, "kl": 0.00237697409465909, "entropy": 0.8181264400482178, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4748800, "num_env_steps_trained": 4748800, "num_agent_steps_sampled": 9497600, "num_agent_steps_trained": 9497600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 294.0, "episode_reward_mean": 565.64, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 282.82}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 174.44, "shaped_reward_min": 94, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.12, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.27, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.03, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.96, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.68, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.8, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.18, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.96, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.68, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.96, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.68, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6978597284218533, "mean_inference_ms": 1.2466446787051728, "mean_action_processing_ms": 0.13381208853991922, "mean_env_wait_ms": 0.840190768123749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 294.0, "episode_reward_mean": 565.64, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 282.82}, "hist_stats": {"episode_reward": [579.0, 524.0, 582.0, 570.0, 573.0, 525.0, 579.0, 579.0, 576.0, 582.0, 579.0, 513.0, 576.0, 579.0, 576.0, 576.0, 522.0, 579.0, 573.0, 579.0, 476.0, 573.0, 525.0, 522.0, 573.0, 579.0, 573.0, 525.0, 582.0, 582.0, 579.0, 522.0, 525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 261.0, 263.0, 286.0, 296.0, 285.0, 285.0, 287.0, 286.0, 260.0, 265.0, 292.0, 287.0, 288.0, 291.0, 288.0, 288.0, 289.0, 293.0, 283.0, 296.0, 255.0, 258.0, 289.0, 287.0, 291.0, 288.0, 286.0, 290.0, 289.0, 287.0, 253.0, 269.0, 290.0, 289.0, 287.0, 286.0, 288.0, 291.0, 239.0, 237.0, 282.0, 291.0, 262.0, 263.0, 262.0, 260.0, 285.0, 288.0, 288.0, 291.0, 285.0, 288.0, 263.0, 262.0, 288.0, 294.0, 291.0, 291.0, 281.0, 298.0, 265.0, 257.0, 260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6978597284218533, "mean_inference_ms": 1.2466446787051728, "mean_action_processing_ms": 0.13381208853991922, "mean_env_wait_ms": 0.840190768123749, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9497600, "num_agent_steps_trained": 9497600, "num_env_steps_sampled": 4748800, "num_env_steps_trained": 4748800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4748800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9497600, "timers": {"training_iteration_time_ms": 3647.884, "learn_time_ms": 1119.647, "learn_throughput": 11432.173, "synch_weights_time_ms": 13.259}, "counters": {"num_env_steps_sampled": 4748800, "num_env_steps_trained": 4748800, "num_agent_steps_sampled": 9497600, "num_agent_steps_trained": 9497600}, "done": false, "episodes_total": 11872, "training_iteration": 371, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-34", "timestamp": 1666581874, "time_this_iter_s": 3.723357677459717, "time_total_s": 1418.8175852298737, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1418.8175852298737, "timesteps_since_restore": 0, "iterations_since_restore": 371, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.95, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.64, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.11, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.74, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.11, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.74, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.11, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.74, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016957891639322042, "policy_loss": 0.0013228158932179213, "vf_loss": 7.805115699768066, "vf_explained_var": 0.5548580288887024, "kl": 0.0029728966765105724, "entropy": 0.8150744438171387, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4761600, "num_env_steps_trained": 4761600, "num_agent_steps_sampled": 9523200, "num_agent_steps_trained": 9523200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 570.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.02}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.64, "shaped_reward_min": 94, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.42, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.11, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.11, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.74, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.87, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.73, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.71, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.11, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.74, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.11, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.74, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6978080925470879, "mean_inference_ms": 1.2465000228512448, "mean_action_processing_ms": 0.13380573271542603, "mean_env_wait_ms": 0.840132564158288, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 294.0, "episode_reward_mean": 570.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 143.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 285.02}, "hist_stats": {"episode_reward": [525.0, 579.0, 582.0, 576.0, 579.0, 579.0, 573.0, 573.0, 579.0, 579.0, 587.0, 294.0, 579.0, 579.0, 573.0, 579.0, 582.0, 576.0, 530.0, 581.0, 579.0, 587.0, 576.0, 573.0, 530.0, 573.0, 579.0, 576.0, 579.0, 576.0, 579.0, 576.0, 573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [260.0, 265.0, 292.0, 287.0, 291.0, 291.0, 292.0, 284.0, 286.0, 293.0, 290.0, 289.0, 286.0, 287.0, 282.0, 291.0, 284.0, 295.0, 289.0, 290.0, 292.0, 295.0, 151.0, 143.0, 293.0, 286.0, 286.0, 293.0, 287.0, 286.0, 283.0, 296.0, 291.0, 291.0, 287.0, 289.0, 264.0, 266.0, 292.0, 289.0, 294.0, 285.0, 291.0, 296.0, 293.0, 283.0, 291.0, 282.0, 258.0, 272.0, 286.0, 287.0, 290.0, 289.0, 288.0, 288.0, 289.0, 290.0, 285.0, 291.0, 292.0, 287.0, 283.0, 293.0, 283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6978080925470879, "mean_inference_ms": 1.2465000228512448, "mean_action_processing_ms": 0.13380573271542603, "mean_env_wait_ms": 0.840132564158288, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9523200, "num_agent_steps_trained": 9523200, "num_env_steps_sampled": 4761600, "num_env_steps_trained": 4761600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4761600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9523200, "timers": {"training_iteration_time_ms": 3642.819, "learn_time_ms": 1120.988, "learn_throughput": 11418.499, "synch_weights_time_ms": 13.247}, "counters": {"num_env_steps_sampled": 4761600, "num_env_steps_trained": 4761600, "num_agent_steps_sampled": 9523200, "num_agent_steps_trained": 9523200}, "done": false, "episodes_total": 11904, "training_iteration": 372, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-38", "timestamp": 1666581878, "time_this_iter_s": 3.649524450302124, "time_total_s": 1422.4671096801758, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1422.4671096801758, "timesteps_since_restore": 0, "iterations_since_restore": 372, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.339999999999996, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.48, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003319069743156433, "policy_loss": 0.002959703328087926, "vf_loss": 7.702720642089844, "vf_explained_var": 0.5470426678657532, "kl": 0.002669147914275527, "entropy": 0.8218092322349548, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4774400, "num_env_steps_trained": 4774400, "num_agent_steps_sampled": 9548800, "num_agent_steps_trained": 9548800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.08, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.04}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.48, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.4, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.41, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.31, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.86, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.46, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.78, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.34, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.32, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6977569384581034, "mean_inference_ms": 1.2463679723179317, "mean_action_processing_ms": 0.13379979339919534, "mean_env_wait_ms": 0.8400612763604244, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.08, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.04}, "hist_stats": {"episode_reward": [573.0, 573.0, 576.0, 519.0, 522.0, 576.0, 570.0, 525.0, 582.0, 582.0, 573.0, 582.0, 579.0, 579.0, 582.0, 570.0, 579.0, 579.0, 582.0, 579.0, 579.0, 522.0, 627.0, 579.0, 579.0, 582.0, 522.0, 576.0, 579.0, 582.0, 576.0, 573.0, 582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 290.0, 291.0, 282.0, 289.0, 287.0, 255.0, 264.0, 264.0, 258.0, 289.0, 287.0, 281.0, 289.0, 259.0, 266.0, 294.0, 288.0, 292.0, 290.0, 287.0, 286.0, 296.0, 286.0, 291.0, 288.0, 291.0, 288.0, 285.0, 297.0, 287.0, 283.0, 292.0, 287.0, 291.0, 288.0, 284.0, 298.0, 299.0, 280.0, 292.0, 287.0, 263.0, 259.0, 316.0, 311.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 260.0, 262.0, 288.0, 288.0, 287.0, 292.0, 291.0, 291.0, 291.0, 285.0, 285.0, 288.0, 290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6977569384581034, "mean_inference_ms": 1.2463679723179317, "mean_action_processing_ms": 0.13379979339919534, "mean_env_wait_ms": 0.8400612763604244, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9548800, "num_agent_steps_trained": 9548800, "num_env_steps_sampled": 4774400, "num_env_steps_trained": 4774400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4774400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9548800, "timers": {"training_iteration_time_ms": 3647.408, "learn_time_ms": 1122.247, "learn_throughput": 11405.689, "synch_weights_time_ms": 13.075}, "counters": {"num_env_steps_sampled": 4774400, "num_env_steps_trained": 4774400, "num_agent_steps_sampled": 9548800, "num_agent_steps_trained": 9548800}, "done": false, "episodes_total": 11936, "training_iteration": 373, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-41", "timestamp": 1666581881, "time_this_iter_s": 3.7315287590026855, "time_total_s": 1426.1986384391785, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1426.1986384391785, "timesteps_since_restore": 0, "iterations_since_restore": 373, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.833333333333332, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.81, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013253169599920511, "policy_loss": -0.0016902193892747164, "vf_loss": 7.725342750549316, "vf_explained_var": 0.5744255185127258, "kl": 0.0023879026994109154, "entropy": 0.8152618408203125, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4787200, "num_env_steps_trained": 4787200, "num_agent_steps_sampled": 9574400, "num_agent_steps_trained": 9574400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.005}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.81, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.07, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.86, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.75, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.69, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.46, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.74, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.66, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.45, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.49, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.43, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.49, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.69, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.46, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.69, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.46, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6977054426253134, "mean_inference_ms": 1.2462279208812725, "mean_action_processing_ms": 0.1337937646791519, "mean_env_wait_ms": 0.8399847131397314, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.005}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 576.0, 579.0, 582.0, 570.0, 579.0, 570.0, 582.0, 579.0, 579.0, 582.0, 579.0, 522.0, 579.0, 579.0, 570.0, 579.0, 579.0, 519.0, 573.0, 579.0, 579.0, 579.0, 522.0, 573.0, 582.0, 630.0, 579.0, 579.0, 582.0, 576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 281.0, 295.0, 284.0, 289.0, 290.0, 286.0, 290.0, 289.0, 289.0, 293.0, 291.0, 279.0, 281.0, 298.0, 281.0, 289.0, 284.0, 298.0, 289.0, 290.0, 286.0, 293.0, 290.0, 292.0, 292.0, 287.0, 256.0, 266.0, 288.0, 291.0, 287.0, 292.0, 271.0, 299.0, 285.0, 294.0, 291.0, 288.0, 256.0, 263.0, 288.0, 285.0, 293.0, 286.0, 291.0, 288.0, 284.0, 295.0, 262.0, 260.0, 280.0, 293.0, 290.0, 292.0, 320.0, 310.0, 285.0, 294.0, 295.0, 284.0, 288.0, 294.0, 288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6977054426253134, "mean_inference_ms": 1.2462279208812725, "mean_action_processing_ms": 0.1337937646791519, "mean_env_wait_ms": 0.8399847131397314, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9574400, "num_agent_steps_trained": 9574400, "num_env_steps_sampled": 4787200, "num_env_steps_trained": 4787200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4787200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9574400, "timers": {"training_iteration_time_ms": 3656.761, "learn_time_ms": 1123.177, "learn_throughput": 11396.24, "synch_weights_time_ms": 12.437}, "counters": {"num_env_steps_sampled": 4787200, "num_env_steps_trained": 4787200, "num_agent_steps_sampled": 9574400, "num_agent_steps_trained": 9574400}, "done": false, "episodes_total": 11968, "training_iteration": 374, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-45", "timestamp": 1666581885, "time_this_iter_s": 3.694704532623291, "time_total_s": 1429.8933429718018, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1429.8933429718018, "timesteps_since_restore": 0, "iterations_since_restore": 374, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.859999999999996, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.98, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.85, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.77, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.77, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.77, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00046897190622985363, "policy_loss": 0.000101038021966815, "vf_loss": 7.793358325958252, "vf_explained_var": 0.5690768361091614, "kl": 0.0023539250250905752, "entropy": 0.8228006362915039, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4800000, "num_env_steps_trained": 4800000, "num_agent_steps_sampled": 9600000, "num_agent_steps_trained": 9600000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 571.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 285.89}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.98, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.09, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.85, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.04, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.74, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.77, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 17.37, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.7, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.61, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.61, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.51, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.47, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.47, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.45, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.77, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 17.37, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.77, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 17.37, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6976622511350119, "mean_inference_ms": 1.246101107480903, "mean_action_processing_ms": 0.1337887336031301, "mean_env_wait_ms": 0.8399220614415523, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 571.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 285.89}, "hist_stats": {"episode_reward": [576.0, 579.0, 570.0, 582.0, 579.0, 630.0, 573.0, 576.0, 536.0, 579.0, 579.0, 576.0, 525.0, 530.0, 576.0, 579.0, 584.0, 522.0, 579.0, 536.0, 522.0, 579.0, 573.0, 582.0, 582.0, 579.0, 576.0, 530.0, 579.0, 630.0, 582.0, 582.0, 522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 288.0, 291.0, 295.0, 275.0, 289.0, 293.0, 288.0, 291.0, 322.0, 308.0, 291.0, 282.0, 286.0, 290.0, 266.0, 270.0, 289.0, 290.0, 290.0, 289.0, 286.0, 290.0, 263.0, 262.0, 268.0, 262.0, 287.0, 289.0, 294.0, 285.0, 295.0, 289.0, 269.0, 253.0, 292.0, 287.0, 272.0, 264.0, 267.0, 255.0, 289.0, 290.0, 284.0, 289.0, 288.0, 294.0, 296.0, 286.0, 287.0, 292.0, 286.0, 290.0, 268.0, 262.0, 291.0, 288.0, 311.0, 319.0, 294.0, 288.0, 291.0, 291.0, 267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6976622511350119, "mean_inference_ms": 1.246101107480903, "mean_action_processing_ms": 0.1337887336031301, "mean_env_wait_ms": 0.8399220614415523, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9600000, "num_agent_steps_trained": 9600000, "num_env_steps_sampled": 4800000, "num_env_steps_trained": 4800000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4800000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9600000, "timers": {"training_iteration_time_ms": 3644.642, "learn_time_ms": 1108.163, "learn_throughput": 11550.646, "synch_weights_time_ms": 11.684}, "counters": {"num_env_steps_sampled": 4800000, "num_env_steps_trained": 4800000, "num_agent_steps_sampled": 9600000, "num_agent_steps_trained": 9600000}, "done": false, "episodes_total": 12000, "training_iteration": 375, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-49", "timestamp": 1666581889, "time_this_iter_s": 3.495922803878784, "time_total_s": 1433.3892657756805, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1433.3892657756805, "timesteps_since_restore": 0, "iterations_since_restore": 375, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.14, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.75, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.64, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.42, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.42, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.42, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003129470278508961, "policy_loss": -0.000683111313264817, "vf_loss": 7.794277667999268, "vf_explained_var": 0.5665597319602966, "kl": 0.00211581913754344, "entropy": 0.8185231685638428, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4812800, "num_env_steps_trained": 4812800, "num_agent_steps_sampled": 9625600, "num_agent_steps_trained": 9625600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 573.14, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.57}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.14, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 14.75, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 18.21, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.64, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 18.08, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.42, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 17.72, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.82, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.68, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.68, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.56, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.57, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.43, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.54, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.38, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.42, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 17.72, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.42, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 17.72, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6976294516149263, "mean_inference_ms": 1.2459799356312125, "mean_action_processing_ms": 0.1337841497120114, "mean_env_wait_ms": 0.8398647128407508, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 573.14, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.57}, "hist_stats": {"episode_reward": [522.0, 582.0, 582.0, 630.0, 573.0, 579.0, 579.0, 522.0, 525.0, 570.0, 582.0, 579.0, 636.0, 582.0, 522.0, 579.0, 522.0, 579.0, 576.0, 579.0, 579.0, 525.0, 576.0, 576.0, 579.0, 582.0, 576.0, 582.0, 582.0, 590.0, 582.0, 579.0, 522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 255.0, 295.0, 287.0, 289.0, 293.0, 313.0, 317.0, 283.0, 290.0, 291.0, 288.0, 290.0, 289.0, 262.0, 260.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 285.0, 294.0, 315.0, 321.0, 289.0, 293.0, 263.0, 259.0, 290.0, 289.0, 270.0, 252.0, 289.0, 290.0, 288.0, 288.0, 291.0, 288.0, 289.0, 290.0, 258.0, 267.0, 288.0, 288.0, 283.0, 293.0, 294.0, 285.0, 293.0, 289.0, 285.0, 291.0, 289.0, 293.0, 292.0, 290.0, 293.0, 297.0, 290.0, 292.0, 290.0, 289.0, 248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6976294516149263, "mean_inference_ms": 1.2459799356312125, "mean_action_processing_ms": 0.1337841497120114, "mean_env_wait_ms": 0.8398647128407508, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9625600, "num_agent_steps_trained": 9625600, "num_env_steps_sampled": 4812800, "num_env_steps_trained": 4812800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4812800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9625600, "timers": {"training_iteration_time_ms": 3645.941, "learn_time_ms": 1099.023, "learn_throughput": 11646.708, "synch_weights_time_ms": 12.001}, "counters": {"num_env_steps_sampled": 4812800, "num_env_steps_trained": 4812800, "num_agent_steps_sampled": 9625600, "num_agent_steps_trained": 9625600}, "done": false, "episodes_total": 12032, "training_iteration": 376, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-53", "timestamp": 1666581893, "time_this_iter_s": 3.6507773399353027, "time_total_s": 1437.0400431156158, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1437.0400431156158, "timesteps_since_restore": 0, "iterations_since_restore": 376, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.08, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007580869714729488, "policy_loss": -0.0011221127351745963, "vf_loss": 7.772212982177734, "vf_explained_var": 0.5765685439109802, "kl": 0.0033011985942721367, "entropy": 0.826388955116272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4825600, "num_env_steps_trained": 4825600, "num_agent_steps_sampled": 9651200, "num_agent_steps_trained": 9651200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 387.0, "episode_reward_mean": 568.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.44}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.08, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.32, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.19, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.99, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.59, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.99, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.99, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6975785576399118, "mean_inference_ms": 1.2458591730176685, "mean_action_processing_ms": 0.13377926353914563, "mean_env_wait_ms": 0.8398065533045238, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 387.0, "episode_reward_mean": 568.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 191.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 284.44}, "hist_stats": {"episode_reward": [522.0, 579.0, 576.0, 582.0, 576.0, 570.0, 570.0, 579.0, 530.0, 522.0, 570.0, 519.0, 579.0, 576.0, 582.0, 579.0, 579.0, 582.0, 579.0, 576.0, 582.0, 630.0, 579.0, 533.0, 579.0, 627.0, 582.0, 579.0, 582.0, 587.0, 533.0, 527.0, 633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [248.0, 274.0, 288.0, 291.0, 289.0, 287.0, 291.0, 291.0, 288.0, 288.0, 287.0, 283.0, 281.0, 289.0, 289.0, 290.0, 264.0, 266.0, 259.0, 263.0, 291.0, 279.0, 264.0, 255.0, 291.0, 288.0, 292.0, 284.0, 286.0, 296.0, 281.0, 298.0, 290.0, 289.0, 290.0, 292.0, 288.0, 291.0, 292.0, 284.0, 290.0, 292.0, 323.0, 307.0, 290.0, 289.0, 263.0, 270.0, 291.0, 288.0, 319.0, 308.0, 285.0, 297.0, 290.0, 289.0, 289.0, 293.0, 294.0, 293.0, 265.0, 268.0, 268.0, 259.0, 313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6975785576399118, "mean_inference_ms": 1.2458591730176685, "mean_action_processing_ms": 0.13377926353914563, "mean_env_wait_ms": 0.8398065533045238, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9651200, "num_agent_steps_trained": 9651200, "num_env_steps_sampled": 4825600, "num_env_steps_trained": 4825600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4825600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9651200, "timers": {"training_iteration_time_ms": 3642.325, "learn_time_ms": 1094.616, "learn_throughput": 11693.594, "synch_weights_time_ms": 11.968}, "counters": {"num_env_steps_sampled": 4825600, "num_env_steps_trained": 4825600, "num_agent_steps_sampled": 9651200, "num_agent_steps_trained": 9651200}, "done": false, "episodes_total": 12064, "training_iteration": 377, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-24-57", "timestamp": 1666581897, "time_this_iter_s": 3.6727993488311768, "time_total_s": 1440.712842464447, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1440.712842464447, "timesteps_since_restore": 0, "iterations_since_restore": 377, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.52, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.07, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.11, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.18, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.91, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.91, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.91, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00013266084715723991, "policy_loss": -0.0005023379344493151, "vf_loss": 7.769681930541992, "vf_explained_var": 0.5716358423233032, "kl": 0.003659307025372982, "entropy": 0.8145787715911865, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4838400, "num_env_steps_trained": 4838400, "num_agent_steps_sampled": 9676800, "num_agent_steps_trained": 9676800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 302.0, "episode_reward_mean": 565.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 282.935}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 175.07, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.11, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 17.18, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.82, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.91, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.51, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.91, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.77, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.64, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.25, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.82, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.91, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.82, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.91, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6975360336417056, "mean_inference_ms": 1.2457301821099558, "mean_action_processing_ms": 0.1337741085947604, "mean_env_wait_ms": 0.8397488598807737, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 302.0, "episode_reward_mean": 565.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 282.935}, "hist_stats": {"episode_reward": [633.0, 630.0, 576.0, 522.0, 579.0, 576.0, 630.0, 579.0, 582.0, 630.0, 579.0, 579.0, 582.0, 584.0, 579.0, 576.0, 519.0, 570.0, 530.0, 570.0, 573.0, 525.0, 579.0, 582.0, 522.0, 579.0, 582.0, 570.0, 630.0, 573.0, 582.0, 587.0, 582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 320.0, 318.0, 312.0, 285.0, 291.0, 261.0, 261.0, 289.0, 290.0, 284.0, 292.0, 314.0, 316.0, 293.0, 286.0, 298.0, 284.0, 309.0, 321.0, 283.0, 296.0, 288.0, 291.0, 285.0, 297.0, 286.0, 298.0, 282.0, 297.0, 291.0, 285.0, 264.0, 255.0, 290.0, 280.0, 266.0, 264.0, 282.0, 288.0, 280.0, 293.0, 257.0, 268.0, 292.0, 287.0, 290.0, 292.0, 254.0, 268.0, 287.0, 292.0, 292.0, 290.0, 289.0, 281.0, 313.0, 317.0, 285.0, 288.0, 293.0, 289.0, 293.0, 294.0, 293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6975360336417056, "mean_inference_ms": 1.2457301821099558, "mean_action_processing_ms": 0.1337741085947604, "mean_env_wait_ms": 0.8397488598807737, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9676800, "num_agent_steps_trained": 9676800, "num_env_steps_sampled": 4838400, "num_env_steps_trained": 4838400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4838400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9676800, "timers": {"training_iteration_time_ms": 3635.925, "learn_time_ms": 1088.364, "learn_throughput": 11760.771, "synch_weights_time_ms": 12.047}, "counters": {"num_env_steps_sampled": 4838400, "num_env_steps_trained": 4838400, "num_agent_steps_sampled": 9676800, "num_agent_steps_trained": 9676800}, "done": false, "episodes_total": 12096, "training_iteration": 378, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-00", "timestamp": 1666581900, "time_this_iter_s": 3.6549689769744873, "time_total_s": 1444.3678114414215, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1444.3678114414215, "timesteps_since_restore": 0, "iterations_since_restore": 378, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.816666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.99, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016985749825835228, "policy_loss": -0.0020646799821406603, "vf_loss": 7.783526420593262, "vf_explained_var": 0.5932220220565796, "kl": 0.002582661574706435, "entropy": 0.8244947195053101, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4851200, "num_env_steps_trained": 4851200, "num_agent_steps_sampled": 9702400, "num_agent_steps_trained": 9702400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 302.0, "episode_reward_mean": 559.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.975}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 16.99, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.21, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.61, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.79, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.14, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.61, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.61, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6974907736658347, "mean_inference_ms": 1.2455914003726156, "mean_action_processing_ms": 0.13376785719477183, "mean_env_wait_ms": 0.8396836843394329, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 302.0, "episode_reward_mean": 559.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.975}, "hist_stats": {"episode_reward": [582.0, 530.0, 579.0, 579.0, 582.0, 579.0, 579.0, 582.0, 573.0, 387.0, 633.0, 582.0, 522.0, 582.0, 579.0, 533.0, 579.0, 525.0, 576.0, 582.0, 576.0, 522.0, 579.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 564.0, 570.0, 576.0, 527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 272.0, 258.0, 296.0, 283.0, 286.0, 293.0, 294.0, 288.0, 291.0, 288.0, 289.0, 290.0, 291.0, 291.0, 291.0, 282.0, 196.0, 191.0, 314.0, 319.0, 296.0, 286.0, 262.0, 260.0, 290.0, 292.0, 289.0, 290.0, 270.0, 263.0, 290.0, 289.0, 259.0, 266.0, 286.0, 290.0, 289.0, 293.0, 286.0, 290.0, 263.0, 259.0, 284.0, 295.0, 290.0, 292.0, 288.0, 291.0, 264.0, 261.0, 286.0, 287.0, 289.0, 290.0, 296.0, 291.0, 273.0, 291.0, 280.0, 290.0, 290.0, 286.0, 259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6974907736658347, "mean_inference_ms": 1.2455914003726156, "mean_action_processing_ms": 0.13376785719477183, "mean_env_wait_ms": 0.8396836843394329, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9702400, "num_agent_steps_trained": 9702400, "num_env_steps_sampled": 4851200, "num_env_steps_trained": 4851200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4851200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9702400, "timers": {"training_iteration_time_ms": 3610.232, "learn_time_ms": 1086.244, "learn_throughput": 11783.729, "synch_weights_time_ms": 11.059}, "counters": {"num_env_steps_sampled": 4851200, "num_env_steps_trained": 4851200, "num_agent_steps_sampled": 9702400, "num_agent_steps_trained": 9702400}, "done": false, "episodes_total": 12128, "training_iteration": 379, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-04", "timestamp": 1666581904, "time_this_iter_s": 3.679121971130371, "time_total_s": 1448.0469334125519, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1448.0469334125519, "timesteps_since_restore": 0, "iterations_since_restore": 379, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.380000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.65, "shaped_reward_min": 102, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.95, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.71, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.71, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.71, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0017400092910975218, "policy_loss": 0.001374026178382337, "vf_loss": 7.744047164916992, "vf_explained_var": 0.5891155004501343, "kl": 0.00385090708732605, "entropy": 0.8168433308601379, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4864000, "num_env_steps_trained": 4864000, "num_agent_steps_sampled": 9728000, "num_agent_steps_trained": 9728000}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 302.0, "episode_reward_mean": 559.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 279.525}, "custom_metrics": {"sparse_reward_mean": 193.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.65, "shaped_reward_min": 102, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 17.06, "onion_pickup_agent_1_min": 7, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 16.95, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.66, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 24, "potting_onion_agent_1_mean": 16.71, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.69, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.19, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.6, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.17, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.66, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 24, "optimal_onion_potting_agent_1_mean": 16.71, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.66, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 24, "viable_onion_potting_agent_1_mean": 16.71, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6974470945713677, "mean_inference_ms": 1.2454543364921187, "mean_action_processing_ms": 0.13376143031894988, "mean_env_wait_ms": 0.8396144520767223, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 302.0, "episode_reward_mean": 559.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 145.0}, "policy_reward_max": {"ppo": 315.0}, "policy_reward_mean": {"ppo": 279.525}, "hist_stats": {"episode_reward": [527.0, 579.0, 522.0, 467.0, 570.0, 525.0, 576.0, 573.0, 576.0, 582.0, 579.0, 573.0, 587.0, 582.0, 582.0, 573.0, 579.0, 570.0, 576.0, 573.0, 525.0, 579.0, 579.0, 579.0, 525.0, 570.0, 582.0, 576.0, 573.0, 579.0, 587.0, 302.0, 582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [259.0, 268.0, 288.0, 291.0, 259.0, 263.0, 229.0, 238.0, 284.0, 286.0, 260.0, 265.0, 299.0, 277.0, 282.0, 291.0, 284.0, 292.0, 290.0, 292.0, 298.0, 281.0, 285.0, 288.0, 294.0, 293.0, 287.0, 295.0, 293.0, 289.0, 290.0, 283.0, 288.0, 291.0, 282.0, 288.0, 287.0, 289.0, 283.0, 290.0, 255.0, 270.0, 289.0, 290.0, 293.0, 286.0, 294.0, 285.0, 260.0, 265.0, 281.0, 289.0, 291.0, 291.0, 287.0, 289.0, 291.0, 282.0, 287.0, 292.0, 296.0, 291.0, 145.0, 157.0, 290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6974470945713677, "mean_inference_ms": 1.2454543364921187, "mean_action_processing_ms": 0.13376143031894988, "mean_env_wait_ms": 0.8396144520767223, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9728000, "num_agent_steps_trained": 9728000, "num_env_steps_sampled": 4864000, "num_env_steps_trained": 4864000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4864000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9728000, "timers": {"training_iteration_time_ms": 3601.475, "learn_time_ms": 1085.233, "learn_throughput": 11794.707, "synch_weights_time_ms": 10.567}, "counters": {"num_env_steps_sampled": 4864000, "num_env_steps_trained": 4864000, "num_agent_steps_sampled": 9728000, "num_agent_steps_trained": 9728000}, "done": false, "episodes_total": 12160, "training_iteration": 380, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-08", "timestamp": 1666581908, "time_this_iter_s": 3.670078992843628, "time_total_s": 1451.7170124053955, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1451.7170124053955, "timesteps_since_restore": 0, "iterations_since_restore": 380, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.116666666666664, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 173.62, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.29, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.88, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0003161475760862231, "policy_loss": -4.565366543829441e-05, "vf_loss": 7.759988307952881, "vf_explained_var": 0.5932192206382751, "kl": 0.002340888138860464, "entropy": 0.8283956050872803, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4876800, "num_env_steps_trained": 4876800, "num_agent_steps_sampled": 9753600, "num_agent_steps_trained": 9753600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 562.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.01}, "custom_metrics": {"sparse_reward_mean": 194.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 173.62, "shaped_reward_min": 111, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.29, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.03, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 26, "useful_onion_pickup_agent_0_mean": 15.08, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.88, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 26, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.81, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.67, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.25, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.76, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.21, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.18, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.58, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.81, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.67, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.81, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.67, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973901172830818, "mean_inference_ms": 1.2453154096742738, "mean_action_processing_ms": 0.1337538870921113, "mean_env_wait_ms": 0.8395324954962717, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 351.0, "episode_reward_mean": 562.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 174.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.01}, "hist_stats": {"episode_reward": [582.0, 525.0, 525.0, 582.0, 573.0, 627.0, 579.0, 527.0, 576.0, 579.0, 579.0, 576.0, 527.0, 582.0, 525.0, 579.0, 579.0, 576.0, 582.0, 582.0, 579.0, 351.0, 579.0, 579.0, 522.0, 627.0, 525.0, 522.0, 579.0, 584.0, 573.0, 522.0, 579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 258.0, 267.0, 259.0, 266.0, 287.0, 295.0, 282.0, 291.0, 315.0, 312.0, 279.0, 300.0, 260.0, 267.0, 288.0, 288.0, 290.0, 289.0, 287.0, 292.0, 288.0, 288.0, 260.0, 267.0, 285.0, 297.0, 264.0, 261.0, 293.0, 286.0, 283.0, 296.0, 291.0, 285.0, 286.0, 296.0, 287.0, 295.0, 290.0, 289.0, 177.0, 174.0, 293.0, 286.0, 286.0, 293.0, 265.0, 257.0, 315.0, 312.0, 262.0, 263.0, 267.0, 255.0, 289.0, 290.0, 289.0, 295.0, 287.0, 286.0, 270.0, 252.0, 289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973901172830818, "mean_inference_ms": 1.2453154096742738, "mean_action_processing_ms": 0.1337538870921113, "mean_env_wait_ms": 0.8395324954962717, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9753600, "num_agent_steps_trained": 9753600, "num_env_steps_sampled": 4876800, "num_env_steps_trained": 4876800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4876800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9753600, "timers": {"training_iteration_time_ms": 3587.267, "learn_time_ms": 1080.725, "learn_throughput": 11843.907, "synch_weights_time_ms": 10.048}, "counters": {"num_env_steps_sampled": 4876800, "num_env_steps_trained": 4876800, "num_agent_steps_sampled": 9753600, "num_agent_steps_trained": 9753600}, "done": false, "episodes_total": 12192, "training_iteration": 381, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-12", "timestamp": 1666581912, "time_this_iter_s": 3.5818943977355957, "time_total_s": 1455.298906803131, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1455.298906803131, "timesteps_since_restore": 0, "iterations_since_restore": 381, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.560000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.64, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.65, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.28, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.28, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.28, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.004279726184904575, "policy_loss": -0.004651351366192102, "vf_loss": 7.765802383422852, "vf_explained_var": 0.5755102634429932, "kl": 0.002863064408302307, "entropy": 0.8099081516265869, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4889600, "num_env_steps_trained": 4889600, "num_agent_steps_sampled": 9779200, "num_agent_steps_trained": 9779200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 566.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 283.12}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.64, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.71, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.65, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.57, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 6, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 6, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.28, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.83, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.28, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.28, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973273806291641, "mean_inference_ms": 1.2451831322393523, "mean_action_processing_ms": 0.13374559805500635, "mean_env_wait_ms": 0.8394495822798175, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 566.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 283.12}, "hist_stats": {"episode_reward": [579.0, 522.0, 582.0, 524.0, 579.0, 561.0, 579.0, 579.0, 582.0, 576.0, 582.0, 579.0, 576.0, 579.0, 519.0, 579.0, 582.0, 576.0, 525.0, 573.0, 582.0, 536.0, 462.0, 510.0, 567.0, 579.0, 579.0, 570.0, 579.0, 530.0, 576.0, 530.0, 567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 254.0, 268.0, 285.0, 297.0, 257.0, 267.0, 287.0, 292.0, 281.0, 280.0, 291.0, 288.0, 296.0, 283.0, 293.0, 289.0, 287.0, 289.0, 292.0, 290.0, 290.0, 289.0, 287.0, 289.0, 285.0, 294.0, 259.0, 260.0, 286.0, 293.0, 291.0, 291.0, 287.0, 289.0, 260.0, 265.0, 289.0, 284.0, 287.0, 295.0, 261.0, 275.0, 232.0, 230.0, 255.0, 255.0, 288.0, 279.0, 288.0, 291.0, 288.0, 291.0, 282.0, 288.0, 288.0, 291.0, 265.0, 265.0, 285.0, 291.0, 267.0, 263.0, 290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6973273806291641, "mean_inference_ms": 1.2451831322393523, "mean_action_processing_ms": 0.13374559805500635, "mean_env_wait_ms": 0.8394495822798175, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9779200, "num_agent_steps_trained": 9779200, "num_env_steps_sampled": 4889600, "num_env_steps_trained": 4889600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4889600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9779200, "timers": {"training_iteration_time_ms": 3587.953, "learn_time_ms": 1082.429, "learn_throughput": 11825.26, "synch_weights_time_ms": 11.017}, "counters": {"num_env_steps_sampled": 4889600, "num_env_steps_trained": 4889600, "num_agent_steps_sampled": 9779200, "num_agent_steps_trained": 9779200}, "done": false, "episodes_total": 12224, "training_iteration": 382, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-16", "timestamp": 1666581916, "time_this_iter_s": 3.6740386486053467, "time_total_s": 1458.9729454517365, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1458.9729454517365, "timesteps_since_restore": 0, "iterations_since_restore": 382, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.34, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.95, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.87, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005173349054530263, "policy_loss": 0.0001583112170919776, "vf_loss": 7.708772659301758, "vf_explained_var": 0.6065121293067932, "kl": 0.002974329050630331, "entropy": 0.8237053751945496, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4902400, "num_env_steps_trained": 4902400, "num_agent_steps_sampled": 9804800, "num_agent_steps_trained": 9804800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 571.54, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.77}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.34, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.95, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.87, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.59, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.09, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.59, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.59, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6972649237496986, "mean_inference_ms": 1.245114700116508, "mean_action_processing_ms": 0.133735206033721, "mean_env_wait_ms": 0.8394135928267644, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 571.54, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.77}, "hist_stats": {"episode_reward": [567.0, 579.0, 576.0, 519.0, 570.0, 579.0, 579.0, 522.0, 630.0, 573.0, 522.0, 573.0, 587.0, 525.0, 573.0, 582.0, 525.0, 525.0, 579.0, 527.0, 573.0, 627.0, 576.0, 525.0, 527.0, 630.0, 587.0, 584.0, 582.0, 579.0, 579.0, 582.0, 576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 277.0, 286.0, 293.0, 288.0, 288.0, 262.0, 257.0, 294.0, 276.0, 286.0, 293.0, 288.0, 291.0, 261.0, 261.0, 317.0, 313.0, 286.0, 287.0, 263.0, 259.0, 289.0, 284.0, 295.0, 292.0, 264.0, 261.0, 283.0, 290.0, 289.0, 293.0, 265.0, 260.0, 258.0, 267.0, 293.0, 286.0, 260.0, 267.0, 280.0, 293.0, 319.0, 308.0, 285.0, 291.0, 267.0, 258.0, 265.0, 262.0, 324.0, 306.0, 296.0, 291.0, 288.0, 296.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 293.0, 289.0, 288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6972649237496986, "mean_inference_ms": 1.245114700116508, "mean_action_processing_ms": 0.133735206033721, "mean_env_wait_ms": 0.8394135928267644, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9804800, "num_agent_steps_trained": 9804800, "num_env_steps_sampled": 4902400, "num_env_steps_trained": 4902400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4902400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9804800, "timers": {"training_iteration_time_ms": 3604.75, "learn_time_ms": 1080.138, "learn_throughput": 11850.335, "synch_weights_time_ms": 10.938}, "counters": {"num_env_steps_sampled": 4902400, "num_env_steps_trained": 4902400, "num_agent_steps_sampled": 9804800, "num_agent_steps_trained": 9804800}, "done": false, "episodes_total": 12256, "training_iteration": 383, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-20", "timestamp": 1666581920, "time_this_iter_s": 3.900717258453369, "time_total_s": 1462.8736627101898, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1462.8736627101898, "timesteps_since_restore": 0, "iterations_since_restore": 383, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 25.160000000000004, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.55, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.54, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.48, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.54, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.54, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002254174090921879, "policy_loss": -0.002626133384183049, "vf_loss": 7.838954448699951, "vf_explained_var": 0.5783690214157104, "kl": 0.0026023017708212137, "entropy": 0.8238720893859863, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4915200, "num_env_steps_trained": 4915200, "num_agent_steps_sampled": 9830400, "num_agent_steps_trained": 9830400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 571.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.675}, "custom_metrics": {"sparse_reward_mean": 197.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 176.55, "shaped_reward_min": 114, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.69, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.82, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.34, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.54, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.48, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.31, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.82, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.34, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.54, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.34, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.54, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6972223456770706, "mean_inference_ms": 1.2451278394099532, "mean_action_processing_ms": 0.1337272957493324, "mean_env_wait_ms": 0.8393944616489282, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 354.0, "episode_reward_mean": 571.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 173.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 285.675}, "hist_stats": {"episode_reward": [576.0, 522.0, 522.0, 582.0, 633.0, 627.0, 570.0, 582.0, 582.0, 579.0, 525.0, 633.0, 570.0, 576.0, 636.0, 584.0, 573.0, 582.0, 584.0, 579.0, 570.0, 579.0, 570.0, 576.0, 570.0, 582.0, 530.0, 582.0, 354.0, 573.0, 587.0, 582.0, 579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 253.0, 269.0, 258.0, 264.0, 292.0, 290.0, 314.0, 319.0, 314.0, 313.0, 286.0, 284.0, 294.0, 288.0, 292.0, 290.0, 285.0, 294.0, 257.0, 268.0, 320.0, 313.0, 293.0, 277.0, 294.0, 282.0, 317.0, 319.0, 291.0, 293.0, 283.0, 290.0, 289.0, 293.0, 290.0, 294.0, 294.0, 285.0, 275.0, 295.0, 286.0, 293.0, 285.0, 285.0, 285.0, 291.0, 284.0, 286.0, 290.0, 292.0, 261.0, 269.0, 289.0, 293.0, 173.0, 181.0, 287.0, 286.0, 299.0, 288.0, 289.0, 293.0, 287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6972223456770706, "mean_inference_ms": 1.2451278394099532, "mean_action_processing_ms": 0.1337272957493324, "mean_env_wait_ms": 0.8393944616489282, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9830400, "num_agent_steps_trained": 9830400, "num_env_steps_sampled": 4915200, "num_env_steps_trained": 4915200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4915200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9830400, "timers": {"training_iteration_time_ms": 3623.122, "learn_time_ms": 1078.354, "learn_throughput": 11869.94, "synch_weights_time_ms": 10.934}, "counters": {"num_env_steps_sampled": 4915200, "num_env_steps_trained": 4915200, "num_agent_steps_sampled": 9830400, "num_agent_steps_trained": 9830400}, "done": false, "episodes_total": 12288, "training_iteration": 384, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-24", "timestamp": 1666581924, "time_this_iter_s": 3.874924659729004, "time_total_s": 1466.7485873699188, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1466.7485873699188, "timesteps_since_restore": 0, "iterations_since_restore": 384, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.75, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.85, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.87, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.87, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.87, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014922493137419224, "policy_loss": 0.0011385057587176561, "vf_loss": 7.6861572265625, "vf_explained_var": 0.5910747647285461, "kl": 0.0026840257924050093, "entropy": 0.8297452926635742, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4928000, "num_env_steps_trained": 4928000, "num_agent_steps_sampled": 9856000, "num_agent_steps_trained": 9856000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.025}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.85, "shaped_reward_min": 159, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.28, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.5, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.14, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.37, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.87, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.06, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.74, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.82, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.57, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.43, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.59, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.53, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.87, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.06, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.87, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.06, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971963878902551, "mean_inference_ms": 1.245146808552077, "mean_action_processing_ms": 0.13371996697535277, "mean_env_wait_ms": 0.8393813351805128, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 519.0, "episode_reward_mean": 572.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.025}, "hist_stats": {"episode_reward": [579.0, 576.0, 579.0, 522.0, 576.0, 573.0, 579.0, 582.0, 579.0, 582.0, 630.0, 582.0, 573.0, 579.0, 576.0, 582.0, 573.0, 579.0, 582.0, 573.0, 579.0, 582.0, 522.0, 582.0, 573.0, 573.0, 576.0, 530.0, 581.0, 582.0, 579.0, 579.0, 582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 291.0, 285.0, 290.0, 289.0, 260.0, 262.0, 292.0, 284.0, 287.0, 286.0, 283.0, 296.0, 288.0, 294.0, 293.0, 286.0, 292.0, 290.0, 309.0, 321.0, 289.0, 293.0, 279.0, 294.0, 276.0, 303.0, 287.0, 289.0, 289.0, 293.0, 290.0, 283.0, 290.0, 289.0, 287.0, 295.0, 284.0, 289.0, 291.0, 288.0, 294.0, 288.0, 260.0, 262.0, 287.0, 295.0, 288.0, 285.0, 289.0, 284.0, 295.0, 281.0, 270.0, 260.0, 295.0, 286.0, 294.0, 288.0, 287.0, 292.0, 288.0, 291.0, 293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971963878902551, "mean_inference_ms": 1.245146808552077, "mean_action_processing_ms": 0.13371996697535277, "mean_env_wait_ms": 0.8393813351805128, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9856000, "num_agent_steps_trained": 9856000, "num_env_steps_sampled": 4928000, "num_env_steps_trained": 4928000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4928000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9856000, "timers": {"training_iteration_time_ms": 3648.492, "learn_time_ms": 1095.707, "learn_throughput": 11681.96, "synch_weights_time_ms": 11.378}, "counters": {"num_env_steps_sampled": 4928000, "num_env_steps_trained": 4928000, "num_agent_steps_sampled": 9856000, "num_agent_steps_trained": 9856000}, "done": false, "episodes_total": 12320, "training_iteration": 385, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-28", "timestamp": 1666581928, "time_this_iter_s": 3.7417666912078857, "time_total_s": 1470.4903540611267, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1470.4903540611267, "timesteps_since_restore": 0, "iterations_since_restore": 385, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.966666666666665, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.42, "shaped_reward_min": 155, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.37, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.89, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.89, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.89, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0015976729337126017, "policy_loss": -0.0019460530020296574, "vf_loss": 7.6813273429870605, "vf_explained_var": 0.5826541185379028, "kl": 0.002368117216974497, "entropy": 0.8395055532455444, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4940800, "num_env_steps_trained": 4940800, "num_agent_steps_sampled": 9881600, "num_agent_steps_trained": 9881600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 515.0, "episode_reward_mean": 567.82, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 283.91}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 175.42, "shaped_reward_min": 155, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.23, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.37, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.27, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.83, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.89, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.71, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.77, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.48, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.44, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.46, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.83, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.89, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.83, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.89, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.697175436184877, "mean_inference_ms": 1.2450850463363872, "mean_action_processing_ms": 0.1337122276607287, "mean_env_wait_ms": 0.839306888527343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 515.0, "episode_reward_mean": 567.82, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 283.91}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 582.0, 530.0, 576.0, 573.0, 582.0, 519.0, 579.0, 530.0, 576.0, 587.0, 627.0, 579.0, 630.0, 581.0, 579.0, 525.0, 582.0, 576.0, 525.0, 630.0, 525.0, 587.0, 524.0, 582.0, 522.0, 579.0, 627.0, 522.0, 525.0, 582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 296.0, 283.0, 285.0, 297.0, 291.0, 291.0, 262.0, 268.0, 292.0, 284.0, 283.0, 290.0, 291.0, 291.0, 256.0, 263.0, 289.0, 290.0, 264.0, 266.0, 286.0, 290.0, 298.0, 289.0, 319.0, 308.0, 286.0, 293.0, 314.0, 316.0, 298.0, 283.0, 291.0, 288.0, 261.0, 264.0, 282.0, 300.0, 288.0, 288.0, 258.0, 267.0, 318.0, 312.0, 266.0, 259.0, 288.0, 299.0, 260.0, 264.0, 287.0, 295.0, 255.0, 267.0, 293.0, 286.0, 310.0, 317.0, 263.0, 259.0, 262.0, 263.0, 301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.697175436184877, "mean_inference_ms": 1.2450850463363872, "mean_action_processing_ms": 0.1337122276607287, "mean_env_wait_ms": 0.839306888527343, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9881600, "num_agent_steps_trained": 9881600, "num_env_steps_sampled": 4940800, "num_env_steps_trained": 4940800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4940800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9881600, "timers": {"training_iteration_time_ms": 3649.585, "learn_time_ms": 1106.505, "learn_throughput": 11567.954, "synch_weights_time_ms": 11.707}, "counters": {"num_env_steps_sampled": 4940800, "num_env_steps_trained": 4940800, "num_agent_steps_sampled": 9881600, "num_agent_steps_trained": 9881600}, "done": false, "episodes_total": 12352, "training_iteration": 386, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-32", "timestamp": 1666581932, "time_this_iter_s": 3.649009943008423, "time_total_s": 1474.1393640041351, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1474.1393640041351, "timesteps_since_restore": 0, "iterations_since_restore": 386, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.16, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.96, "shaped_reward_min": 108, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00029556709341704845, "policy_loss": -0.0006432689260691404, "vf_loss": 7.661875247955322, "vf_explained_var": 0.6070252656936646, "kl": 0.0027911756187677383, "entropy": 0.836971640586853, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4953600, "num_env_steps_trained": 4953600, "num_agent_steps_sampled": 9907200, "num_agent_steps_trained": 9907200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 348.0, "episode_reward_mean": 567.76, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 283.88}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.96, "shaped_reward_min": 108, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 17.34, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.15, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 17.25, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.79, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.87, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.68, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.81, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.64, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.06, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.4, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.56, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.36, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.48, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.79, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.87, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.79, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.87, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971456596710317, "mean_inference_ms": 1.244961424900404, "mean_action_processing_ms": 0.13370369220716977, "mean_env_wait_ms": 0.8392242722293407, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 348.0, "episode_reward_mean": 567.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 283.88}, "hist_stats": {"episode_reward": [582.0, 582.0, 579.0, 522.0, 581.0, 576.0, 576.0, 582.0, 587.0, 530.0, 582.0, 570.0, 579.0, 587.0, 573.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 522.0, 576.0, 579.0, 576.0, 579.0, 576.0, 582.0, 581.0, 579.0, 582.0, 530.0, 573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [301.0, 281.0, 290.0, 292.0, 285.0, 294.0, 271.0, 251.0, 288.0, 293.0, 288.0, 288.0, 288.0, 288.0, 293.0, 289.0, 290.0, 297.0, 262.0, 268.0, 295.0, 287.0, 287.0, 283.0, 290.0, 289.0, 290.0, 297.0, 289.0, 284.0, 288.0, 291.0, 283.0, 290.0, 278.0, 295.0, 296.0, 283.0, 287.0, 286.0, 289.0, 290.0, 262.0, 260.0, 290.0, 286.0, 295.0, 284.0, 289.0, 287.0, 286.0, 293.0, 291.0, 285.0, 293.0, 289.0, 297.0, 284.0, 287.0, 292.0, 293.0, 289.0, 268.0, 262.0, 290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971456596710317, "mean_inference_ms": 1.244961424900404, "mean_action_processing_ms": 0.13370369220716977, "mean_env_wait_ms": 0.8392242722293407, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9907200, "num_agent_steps_trained": 9907200, "num_env_steps_sampled": 4953600, "num_env_steps_trained": 4953600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4953600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9907200, "timers": {"training_iteration_time_ms": 3647.842, "learn_time_ms": 1106.231, "learn_throughput": 11570.818, "synch_weights_time_ms": 11.207}, "counters": {"num_env_steps_sampled": 4953600, "num_env_steps_trained": 4953600, "num_agent_steps_sampled": 9907200, "num_agent_steps_trained": 9907200}, "done": false, "episodes_total": 12384, "training_iteration": 387, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-35", "timestamp": 1666581935, "time_this_iter_s": 3.654477119445801, "time_total_s": 1477.793841123581, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1477.793841123581, "timesteps_since_restore": 0, "iterations_since_restore": 387, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.88333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.58, "shaped_reward_min": 108, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.43, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.28, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.76, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.76, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.76, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0019375028787180781, "policy_loss": 0.0015798690728843212, "vf_loss": 7.705456733703613, "vf_explained_var": 0.6002016067504883, "kl": 0.0024976124987006187, "entropy": 0.8258252739906311, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4966400, "num_env_steps_trained": 4966400, "num_agent_steps_sampled": 9932800, "num_agent_steps_trained": 9932800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 348.0, "episode_reward_mean": 567.38, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 283.69}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.58, "shaped_reward_min": 108, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.43, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.28, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.29, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.19, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.76, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.58, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.9, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.67, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.29, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.55, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.76, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.76, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971033523311602, "mean_inference_ms": 1.2448045816757196, "mean_action_processing_ms": 0.13369576714133724, "mean_env_wait_ms": 0.839134076244018, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 348.0, "episode_reward_mean": 567.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 283.69}, "hist_stats": {"episode_reward": [573.0, 519.0, 582.0, 627.0, 519.0, 587.0, 573.0, 576.0, 576.0, 530.0, 576.0, 530.0, 576.0, 522.0, 573.0, 579.0, 576.0, 525.0, 579.0, 539.0, 579.0, 579.0, 579.0, 573.0, 515.0, 522.0, 576.0, 579.0, 576.0, 573.0, 576.0, 525.0, 567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 283.0, 262.0, 257.0, 295.0, 287.0, 315.0, 312.0, 259.0, 260.0, 299.0, 288.0, 284.0, 289.0, 289.0, 287.0, 286.0, 290.0, 266.0, 264.0, 288.0, 288.0, 270.0, 260.0, 280.0, 296.0, 260.0, 262.0, 285.0, 288.0, 289.0, 290.0, 290.0, 286.0, 262.0, 263.0, 287.0, 292.0, 270.0, 269.0, 295.0, 284.0, 290.0, 289.0, 288.0, 291.0, 282.0, 291.0, 261.0, 254.0, 262.0, 260.0, 289.0, 287.0, 299.0, 280.0, 293.0, 283.0, 291.0, 282.0, 291.0, 285.0, 264.0, 261.0, 284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6971033523311602, "mean_inference_ms": 1.2448045816757196, "mean_action_processing_ms": 0.13369576714133724, "mean_env_wait_ms": 0.839134076244018, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9932800, "num_agent_steps_trained": 9932800, "num_env_steps_sampled": 4966400, "num_env_steps_trained": 4966400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4966400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9932800, "timers": {"training_iteration_time_ms": 3650.674, "learn_time_ms": 1110.814, "learn_throughput": 11523.082, "synch_weights_time_ms": 11.715}, "counters": {"num_env_steps_sampled": 4966400, "num_env_steps_trained": 4966400, "num_agent_steps_sampled": 9932800, "num_agent_steps_trained": 9932800}, "done": false, "episodes_total": 12416, "training_iteration": 388, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-39", "timestamp": 1666581939, "time_this_iter_s": 3.681104898452759, "time_total_s": 1481.4749460220337, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1481.4749460220337, "timesteps_since_restore": 0, "iterations_since_restore": 388, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.74, "shaped_reward_min": 108, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.61, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.000489800819195807, "policy_loss": 0.00010905158706009388, "vf_loss": 7.963174819946289, "vf_explained_var": 0.5872219800949097, "kl": 0.002789913909509778, "entropy": 0.83113694190979, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4979200, "num_env_steps_trained": 4979200, "num_agent_steps_sampled": 9958400, "num_agent_steps_trained": 9958400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 348.0, "episode_reward_mean": 568.74, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 284.37}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.74, "shaped_reward_min": 108, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.1, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 17.61, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 14.98, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 17.49, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.62, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 17.14, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.66, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 12, "dish_pickup_agent_1_mean": 4.88, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.63, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.16, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 7, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.1, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 6, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.63, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.57, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.62, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 17.14, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.62, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 17.14, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6970445040200594, "mean_inference_ms": 1.2446568491000776, "mean_action_processing_ms": 0.13368825935226153, "mean_env_wait_ms": 0.8390474799522084, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 348.0, "episode_reward_mean": 568.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 172.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 284.37}, "hist_stats": {"episode_reward": [567.0, 576.0, 570.0, 570.0, 576.0, 582.0, 570.0, 579.0, 579.0, 579.0, 530.0, 576.0, 579.0, 584.0, 570.0, 582.0, 525.0, 624.0, 630.0, 579.0, 576.0, 579.0, 579.0, 530.0, 522.0, 576.0, 348.0, 573.0, 573.0, 579.0, 582.0, 630.0, 573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 283.0, 295.0, 281.0, 285.0, 285.0, 274.0, 296.0, 285.0, 291.0, 291.0, 291.0, 288.0, 282.0, 282.0, 297.0, 289.0, 290.0, 287.0, 292.0, 255.0, 275.0, 289.0, 287.0, 284.0, 295.0, 285.0, 299.0, 290.0, 280.0, 293.0, 289.0, 261.0, 264.0, 308.0, 316.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 288.0, 291.0, 285.0, 294.0, 262.0, 268.0, 258.0, 264.0, 284.0, 292.0, 172.0, 176.0, 288.0, 285.0, 285.0, 288.0, 293.0, 286.0, 286.0, 296.0, 316.0, 314.0, 290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6970445040200594, "mean_inference_ms": 1.2446568491000776, "mean_action_processing_ms": 0.13368825935226153, "mean_env_wait_ms": 0.8390474799522084, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9958400, "num_agent_steps_trained": 9958400, "num_env_steps_sampled": 4979200, "num_env_steps_trained": 4979200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4979200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9958400, "timers": {"training_iteration_time_ms": 3643.09, "learn_time_ms": 1109.725, "learn_throughput": 11534.391, "synch_weights_time_ms": 11.803}, "counters": {"num_env_steps_sampled": 4979200, "num_env_steps_trained": 4979200, "num_agent_steps_sampled": 9958400, "num_agent_steps_trained": 9958400}, "done": false, "episodes_total": 12448, "training_iteration": 389, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-43", "timestamp": 1666581943, "time_this_iter_s": 3.614347457885742, "time_total_s": 1485.0892934799194, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1485.0892934799194, "timesteps_since_restore": 0, "iterations_since_restore": 389, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.499999999999996, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.1, "shaped_reward_min": 127, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.48, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.07, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.72, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.07, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.07, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -8.018617518246174e-05, "policy_loss": -0.0004352282849140465, "vf_loss": 7.723010063171387, "vf_explained_var": 0.5921529531478882, "kl": 0.002627419773489237, "entropy": 0.8345175981521606, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 4992000, "num_env_steps_trained": 4992000, "num_agent_steps_sampled": 9984000, "num_agent_steps_trained": 9984000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 447.0, "episode_reward_mean": 572.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 218.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.45}, "custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.1, "shaped_reward_min": 127, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.38, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.48, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.24, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.92, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.07, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.64, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.94, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.72, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.07, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.31, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.3, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.92, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.07, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.92, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.07, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969998730166297, "mean_inference_ms": 1.2445213151785859, "mean_action_processing_ms": 0.13368180538004462, "mean_env_wait_ms": 0.838974390971066, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 447.0, "episode_reward_mean": 572.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 218.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.45}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 573.0, 579.0, 522.0, 579.0, 576.0, 579.0, 573.0, 522.0, 530.0, 579.0, 582.0, 627.0, 576.0, 573.0, 579.0, 576.0, 579.0, 581.0, 584.0, 576.0, 630.0, 573.0, 579.0, 582.0, 530.0, 576.0, 579.0, 579.0, 447.0, 582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 283.0, 294.0, 288.0, 290.0, 289.0, 285.0, 288.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 296.0, 280.0, 293.0, 286.0, 277.0, 296.0, 259.0, 263.0, 269.0, 261.0, 288.0, 291.0, 296.0, 286.0, 317.0, 310.0, 285.0, 291.0, 286.0, 287.0, 291.0, 288.0, 290.0, 286.0, 288.0, 291.0, 282.0, 299.0, 297.0, 287.0, 290.0, 286.0, 311.0, 319.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 262.0, 268.0, 284.0, 292.0, 291.0, 288.0, 290.0, 289.0, 229.0, 218.0, 291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969998730166297, "mean_inference_ms": 1.2445213151785859, "mean_action_processing_ms": 0.13368180538004462, "mean_env_wait_ms": 0.838974390971066, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 9984000, "num_agent_steps_trained": 9984000, "num_env_steps_sampled": 4992000, "num_env_steps_trained": 4992000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 4992000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 9984000, "timers": {"training_iteration_time_ms": 3655.183, "learn_time_ms": 1119.61, "learn_throughput": 11432.549, "synch_weights_time_ms": 11.891}, "counters": {"num_env_steps_sampled": 4992000, "num_env_steps_trained": 4992000, "num_agent_steps_sampled": 9984000, "num_agent_steps_trained": 9984000}, "done": false, "episodes_total": 12480, "training_iteration": 390, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-47", "timestamp": 1666581947, "time_this_iter_s": 3.7840723991394043, "time_total_s": 1488.8733658790588, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1488.8733658790588, "timesteps_since_restore": 0, "iterations_since_restore": 390, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.833333333333332, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.72, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.95, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.95, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.95, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0028976090252399445, "policy_loss": -0.003267057705670595, "vf_loss": 7.814639091491699, "vf_explained_var": 0.5801650881767273, "kl": 0.0025380898732692003, "entropy": 0.8240329027175903, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5004800, "num_env_steps_trained": 5004800, "num_agent_steps_sampled": 10009600, "num_agent_steps_trained": 10009600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 572.92, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.46}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.72, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 17.35, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.28, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.98, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.95, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.59, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.08, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.71, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.26, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.69, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.98, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.95, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.98, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.95, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969574750027725, "mean_inference_ms": 1.244395143457617, "mean_action_processing_ms": 0.13367687287664567, "mean_env_wait_ms": 0.8389141369511748, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 572.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.46}, "hist_stats": {"episode_reward": [582.0, 530.0, 633.0, 576.0, 576.0, 579.0, 579.0, 522.0, 582.0, 582.0, 573.0, 633.0, 570.0, 579.0, 519.0, 584.0, 522.0, 579.0, 579.0, 579.0, 573.0, 582.0, 525.0, 579.0, 579.0, 630.0, 582.0, 579.0, 501.0, 579.0, 552.0, 522.0, 579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 260.0, 270.0, 314.0, 319.0, 290.0, 286.0, 288.0, 288.0, 284.0, 295.0, 293.0, 286.0, 254.0, 268.0, 286.0, 296.0, 291.0, 291.0, 281.0, 292.0, 308.0, 325.0, 299.0, 271.0, 288.0, 291.0, 261.0, 258.0, 293.0, 291.0, 267.0, 255.0, 284.0, 295.0, 291.0, 288.0, 289.0, 290.0, 292.0, 281.0, 293.0, 289.0, 261.0, 264.0, 283.0, 296.0, 290.0, 289.0, 311.0, 319.0, 286.0, 296.0, 287.0, 292.0, 247.0, 254.0, 288.0, 291.0, 270.0, 282.0, 256.0, 266.0, 289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969574750027725, "mean_inference_ms": 1.244395143457617, "mean_action_processing_ms": 0.13367687287664567, "mean_env_wait_ms": 0.8389141369511748, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10009600, "num_agent_steps_trained": 10009600, "num_env_steps_sampled": 5004800, "num_env_steps_trained": 5004800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5004800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10009600, "timers": {"training_iteration_time_ms": 3662.928, "learn_time_ms": 1125.14, "learn_throughput": 11376.366, "synch_weights_time_ms": 13.044}, "counters": {"num_env_steps_sampled": 5004800, "num_env_steps_trained": 5004800, "num_agent_steps_sampled": 10009600, "num_agent_steps_trained": 10009600}, "done": false, "episodes_total": 12512, "training_iteration": 391, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-51", "timestamp": 1666581951, "time_this_iter_s": 3.6799001693725586, "time_total_s": 1492.5532660484314, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1492.5532660484314, "timesteps_since_restore": 0, "iterations_since_restore": 391, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.47, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0016610324382781982, "policy_loss": -0.002023993991315365, "vf_loss": 7.828275680541992, "vf_explained_var": 0.5818252563476562, "kl": 0.003011357504874468, "entropy": 0.839729905128479, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5017600, "num_env_steps_trained": 5017600, "num_agent_steps_sampled": 10035200, "num_agent_steps_trained": 10035200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 574.99, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.495}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.41, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.47, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.26, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.32, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.89, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 17.1, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.67, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.93, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.43, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.71, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 2, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.62, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.6, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.89, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 17.1, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.89, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 17.1, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969268436790937, "mean_inference_ms": 1.2442765016526312, "mean_action_processing_ms": 0.13367506415155378, "mean_env_wait_ms": 0.8388712862759672, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 574.99, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.495}, "hist_stats": {"episode_reward": [579.0, 522.0, 525.0, 579.0, 630.0, 522.0, 579.0, 630.0, 582.0, 579.0, 582.0, 630.0, 587.0, 579.0, 570.0, 570.0, 633.0, 570.0, 579.0, 579.0, 576.0, 525.0, 576.0, 582.0, 576.0, 522.0, 582.0, 570.0, 627.0, 579.0, 582.0, 576.0, 576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 260.0, 262.0, 245.0, 280.0, 290.0, 289.0, 314.0, 316.0, 253.0, 269.0, 285.0, 294.0, 311.0, 319.0, 288.0, 294.0, 290.0, 289.0, 297.0, 285.0, 317.0, 313.0, 293.0, 294.0, 290.0, 289.0, 282.0, 288.0, 288.0, 282.0, 318.0, 315.0, 279.0, 291.0, 290.0, 289.0, 287.0, 292.0, 296.0, 280.0, 267.0, 258.0, 286.0, 290.0, 291.0, 291.0, 292.0, 284.0, 257.0, 265.0, 292.0, 290.0, 277.0, 293.0, 315.0, 312.0, 291.0, 288.0, 291.0, 291.0, 297.0, 279.0, 285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6969268436790937, "mean_inference_ms": 1.2442765016526312, "mean_action_processing_ms": 0.13367506415155378, "mean_env_wait_ms": 0.8388712862759672, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10035200, "num_agent_steps_trained": 10035200, "num_env_steps_sampled": 5017600, "num_env_steps_trained": 5017600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5017600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10035200, "timers": {"training_iteration_time_ms": 3669.587, "learn_time_ms": 1130.295, "learn_throughput": 11324.482, "synch_weights_time_ms": 12.194}, "counters": {"num_env_steps_sampled": 5017600, "num_env_steps_trained": 5017600, "num_agent_steps_sampled": 10035200, "num_agent_steps_trained": 10035200}, "done": false, "episodes_total": 12544, "training_iteration": 392, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-55", "timestamp": 1666581955, "time_this_iter_s": 3.7447876930236816, "time_total_s": 1496.298053741455, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1496.298053741455, "timesteps_since_restore": 0, "iterations_since_restore": 392, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.783333333333335, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.84, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007807082729414105, "policy_loss": 0.0004165899008512497, "vf_loss": 7.79111385345459, "vf_explained_var": 0.5709396600723267, "kl": 0.0023730946704745293, "entropy": 0.8299859762191772, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5030400, "num_env_steps_trained": 5030400, "num_agent_steps_sampled": 10060800, "num_agent_steps_trained": 10060800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 575.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.62}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.84, "shaped_reward_min": 138, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.33, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.14, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.0, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.98, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.65, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.95, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.45, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.41, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.65, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.38, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.62, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.0, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.98, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.0, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.98, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6968894112824401, "mean_inference_ms": 1.2441523884701358, "mean_action_processing_ms": 0.1336716106768715, "mean_env_wait_ms": 0.8388130845267634, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 465.0, "episode_reward_mean": 575.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.62}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 579.0, 570.0, 582.0, 573.0, 582.0, 570.0, 582.0, 581.0, 567.0, 579.0, 519.0, 579.0, 581.0, 579.0, 570.0, 465.0, 576.0, 522.0, 627.0, 630.0, 582.0, 576.0, 579.0, 573.0, 579.0, 633.0, 576.0, 570.0, 498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 285.0, 294.0, 291.0, 291.0, 284.0, 295.0, 290.0, 289.0, 286.0, 284.0, 296.0, 286.0, 283.0, 290.0, 291.0, 291.0, 279.0, 291.0, 296.0, 286.0, 285.0, 296.0, 283.0, 284.0, 290.0, 289.0, 257.0, 262.0, 288.0, 291.0, 291.0, 290.0, 288.0, 291.0, 284.0, 286.0, 239.0, 226.0, 287.0, 289.0, 251.0, 271.0, 314.0, 313.0, 306.0, 324.0, 293.0, 289.0, 290.0, 286.0, 291.0, 288.0, 279.0, 294.0, 291.0, 288.0, 324.0, 309.0, 296.0, 280.0, 282.0, 288.0, 250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6968894112824401, "mean_inference_ms": 1.2441523884701358, "mean_action_processing_ms": 0.1336716106768715, "mean_env_wait_ms": 0.8388130845267634, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10060800, "num_agent_steps_trained": 10060800, "num_env_steps_sampled": 5030400, "num_env_steps_trained": 5030400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5030400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10060800, "timers": {"training_iteration_time_ms": 3647.035, "learn_time_ms": 1133.033, "learn_throughput": 11297.107, "synch_weights_time_ms": 11.676}, "counters": {"num_env_steps_sampled": 5030400, "num_env_steps_trained": 5030400, "num_agent_steps_sampled": 10060800, "num_agent_steps_trained": 10060800}, "done": false, "episodes_total": 12576, "training_iteration": 393, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-25-59", "timestamp": 1666581959, "time_this_iter_s": 3.6797895431518555, "time_total_s": 1499.977843284607, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1499.977843284607, "timesteps_since_restore": 0, "iterations_since_restore": 393, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.03333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.82, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.82, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.82, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002479029353708029, "policy_loss": -0.0028482386842370033, "vf_loss": 7.850776672363281, "vf_explained_var": 0.5610437393188477, "kl": 0.0021069832146167755, "entropy": 0.8317380547523499, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5043200, "num_env_steps_trained": 5043200, "num_agent_steps_sampled": 10086400, "num_agent_steps_trained": 10086400}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 419.0, "episode_reward_mean": 571.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 285.805}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 138, "shaped_reward_max": 187, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.48, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.2, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.82, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.57, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.75, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.32, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.28, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.82, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.82, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6968450632443728, "mean_inference_ms": 1.2440176732192514, "mean_action_processing_ms": 0.1336656129888326, "mean_env_wait_ms": 0.838737991964549, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 419.0, "episode_reward_mean": 571.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 285.805}, "hist_stats": {"episode_reward": [498.0, 525.0, 579.0, 573.0, 579.0, 576.0, 581.0, 582.0, 579.0, 579.0, 582.0, 573.0, 579.0, 587.0, 582.0, 582.0, 579.0, 582.0, 579.0, 555.0, 584.0, 582.0, 579.0, 576.0, 582.0, 582.0, 581.0, 530.0, 576.0, 570.0, 570.0, 573.0, 576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [250.0, 248.0, 275.0, 250.0, 288.0, 291.0, 285.0, 288.0, 287.0, 292.0, 287.0, 289.0, 293.0, 288.0, 290.0, 292.0, 284.0, 295.0, 286.0, 293.0, 288.0, 294.0, 290.0, 283.0, 298.0, 281.0, 287.0, 300.0, 301.0, 281.0, 293.0, 289.0, 288.0, 291.0, 290.0, 292.0, 287.0, 292.0, 283.0, 272.0, 288.0, 296.0, 293.0, 289.0, 288.0, 291.0, 288.0, 288.0, 293.0, 289.0, 290.0, 292.0, 284.0, 297.0, 275.0, 255.0, 286.0, 290.0, 275.0, 295.0, 293.0, 277.0, 287.0, 286.0, 292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6968450632443728, "mean_inference_ms": 1.2440176732192514, "mean_action_processing_ms": 0.1336656129888326, "mean_env_wait_ms": 0.838737991964549, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10086400, "num_agent_steps_trained": 10086400, "num_env_steps_sampled": 5043200, "num_env_steps_trained": 5043200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5043200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10086400, "timers": {"training_iteration_time_ms": 3612.599, "learn_time_ms": 1126.168, "learn_throughput": 11365.974, "synch_weights_time_ms": 11.582}, "counters": {"num_env_steps_sampled": 5043200, "num_env_steps_trained": 5043200, "num_agent_steps_sampled": 10086400, "num_agent_steps_trained": 10086400}, "done": false, "episodes_total": 12608, "training_iteration": 394, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-02", "timestamp": 1666581962, "time_this_iter_s": 3.534458875656128, "time_total_s": 1503.512302160263, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1503.512302160263, "timesteps_since_restore": 0, "iterations_since_restore": 394, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.339999999999996, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 175.26, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.72, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.000399833545088768, "policy_loss": -0.000769574660807848, "vf_loss": 7.834109306335449, "vf_explained_var": 0.580340564250946, "kl": 0.0025107176043093204, "entropy": 0.827340841293335, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5056000, "num_env_steps_trained": 5056000, "num_agent_steps_sampled": 10112000, "num_agent_steps_trained": 10112000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 568.86, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 284.43}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 175.26, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.72, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 3, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.41, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.86, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.8, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.41, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.41, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696793336703791, "mean_inference_ms": 1.243883291734237, "mean_action_processing_ms": 0.1336582079706314, "mean_env_wait_ms": 0.8386582320927474, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 568.86, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 284.43}, "hist_stats": {"episode_reward": [576.0, 576.0, 582.0, 573.0, 582.0, 582.0, 579.0, 570.0, 576.0, 582.0, 587.0, 587.0, 582.0, 582.0, 579.0, 530.0, 627.0, 576.0, 579.0, 573.0, 582.0, 525.0, 579.0, 627.0, 576.0, 579.0, 579.0, 576.0, 627.0, 576.0, 579.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 284.0, 288.0, 288.0, 290.0, 292.0, 293.0, 280.0, 294.0, 288.0, 288.0, 294.0, 285.0, 294.0, 295.0, 275.0, 291.0, 285.0, 290.0, 292.0, 290.0, 297.0, 295.0, 292.0, 296.0, 286.0, 293.0, 289.0, 291.0, 288.0, 257.0, 273.0, 311.0, 316.0, 288.0, 288.0, 278.0, 301.0, 283.0, 290.0, 289.0, 293.0, 259.0, 266.0, 296.0, 283.0, 313.0, 314.0, 290.0, 286.0, 285.0, 294.0, 282.0, 297.0, 293.0, 283.0, 311.0, 316.0, 296.0, 280.0, 291.0, 288.0, 293.0, 280.0, 293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696793336703791, "mean_inference_ms": 1.243883291734237, "mean_action_processing_ms": 0.1336582079706314, "mean_env_wait_ms": 0.8386582320927474, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10112000, "num_agent_steps_trained": 10112000, "num_env_steps_sampled": 5056000, "num_env_steps_trained": 5056000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5056000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10112000, "timers": {"training_iteration_time_ms": 3593.292, "learn_time_ms": 1112.946, "learn_throughput": 11501.003, "synch_weights_time_ms": 11.562}, "counters": {"num_env_steps_sampled": 5056000, "num_env_steps_trained": 5056000, "num_agent_steps_sampled": 10112000, "num_agent_steps_trained": 10112000}, "done": false, "episodes_total": 12640, "training_iteration": 395, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-06", "timestamp": 1666581966, "time_this_iter_s": 3.558100461959839, "time_total_s": 1507.070402622223, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1507.070402622223, "timesteps_since_restore": 0, "iterations_since_restore": 395, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.6, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.93, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.31, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.31, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.31, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0011330365668982267, "policy_loss": 0.0007607118459418416, "vf_loss": 7.895255088806152, "vf_explained_var": 0.5666273236274719, "kl": 0.0026027029380202293, "entropy": 0.8344019055366516, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5068800, "num_env_steps_trained": 5068800, "num_agent_steps_sampled": 10137600, "num_agent_steps_trained": 10137600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 565.13, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.565}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.93, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.75, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.67, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.31, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.09, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.9, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.31, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.31, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6967359713771896, "mean_inference_ms": 1.2437290472164755, "mean_action_processing_ms": 0.13364784707998414, "mean_env_wait_ms": 0.838560435275775, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 565.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.565}, "hist_stats": {"episode_reward": [573.0, 582.0, 525.0, 573.0, 570.0, 579.0, 419.0, 579.0, 582.0, 579.0, 579.0, 587.0, 582.0, 573.0, 576.0, 573.0, 582.0, 582.0, 570.0, 582.0, 570.0, 573.0, 587.0, 570.0, 573.0, 576.0, 570.0, 579.0, 530.0, 567.0, 570.0, 465.0, 527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 280.0, 288.0, 294.0, 263.0, 262.0, 285.0, 288.0, 284.0, 286.0, 284.0, 295.0, 218.0, 201.0, 295.0, 284.0, 288.0, 294.0, 288.0, 291.0, 293.0, 286.0, 290.0, 297.0, 294.0, 288.0, 294.0, 279.0, 285.0, 291.0, 288.0, 285.0, 290.0, 292.0, 285.0, 297.0, 279.0, 291.0, 292.0, 290.0, 285.0, 285.0, 278.0, 295.0, 291.0, 296.0, 279.0, 291.0, 284.0, 289.0, 280.0, 296.0, 279.0, 291.0, 290.0, 289.0, 264.0, 266.0, 283.0, 284.0, 279.0, 291.0, 235.0, 230.0, 265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6967359713771896, "mean_inference_ms": 1.2437290472164755, "mean_action_processing_ms": 0.13364784707998414, "mean_env_wait_ms": 0.838560435275775, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10137600, "num_agent_steps_trained": 10137600, "num_env_steps_sampled": 5068800, "num_env_steps_trained": 5068800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5068800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10137600, "timers": {"training_iteration_time_ms": 3587.318, "learn_time_ms": 1109.296, "learn_throughput": 11538.847, "synch_weights_time_ms": 10.745}, "counters": {"num_env_steps_sampled": 5068800, "num_env_steps_trained": 5068800, "num_agent_steps_sampled": 10137600, "num_agent_steps_trained": 10137600}, "done": false, "episodes_total": 12672, "training_iteration": 396, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-10", "timestamp": 1666581970, "time_this_iter_s": 3.577162981033325, "time_total_s": 1510.6475656032562, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1510.6475656032562, "timesteps_since_restore": 0, "iterations_since_restore": 396, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.43, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.24, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.45, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003785345470532775, "policy_loss": 0.0034148150589317083, "vf_loss": 7.838505744934082, "vf_explained_var": 0.5580576062202454, "kl": 0.00246319267898798, "entropy": 0.8266406059265137, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5081600, "num_env_steps_trained": 5081600, "num_agent_steps_sampled": 10163200, "num_agent_steps_trained": 10163200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 566.03, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 283.015}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.43, "shaped_reward_min": 85, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.24, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.45, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.06, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.88, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.14, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.01, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.91, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.88, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.88, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966830217276799, "mean_inference_ms": 1.2435711010311095, "mean_action_processing_ms": 0.1336373918179333, "mean_env_wait_ms": 0.8384640728388132, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 205.0, "episode_reward_mean": 566.03, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 97.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 283.015}, "hist_stats": {"episode_reward": [527.0, 582.0, 522.0, 579.0, 579.0, 582.0, 579.0, 498.0, 582.0, 570.0, 582.0, 579.0, 576.0, 573.0, 570.0, 579.0, 570.0, 576.0, 205.0, 587.0, 582.0, 576.0, 579.0, 582.0, 579.0, 584.0, 573.0, 579.0, 579.0, 633.0, 525.0, 573.0, 582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 262.0, 286.0, 296.0, 262.0, 260.0, 288.0, 291.0, 294.0, 285.0, 293.0, 289.0, 290.0, 289.0, 241.0, 257.0, 291.0, 291.0, 286.0, 284.0, 291.0, 291.0, 291.0, 288.0, 285.0, 291.0, 286.0, 287.0, 275.0, 295.0, 285.0, 294.0, 285.0, 285.0, 294.0, 282.0, 108.0, 97.0, 296.0, 291.0, 290.0, 292.0, 284.0, 292.0, 292.0, 287.0, 290.0, 292.0, 293.0, 286.0, 302.0, 282.0, 283.0, 290.0, 286.0, 293.0, 285.0, 294.0, 316.0, 317.0, 269.0, 256.0, 293.0, 280.0, 291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966830217276799, "mean_inference_ms": 1.2435711010311095, "mean_action_processing_ms": 0.1336373918179333, "mean_env_wait_ms": 0.8384640728388132, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10163200, "num_agent_steps_trained": 10163200, "num_env_steps_sampled": 5081600, "num_env_steps_trained": 5081600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5081600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10163200, "timers": {"training_iteration_time_ms": 3585.77, "learn_time_ms": 1112.588, "learn_throughput": 11504.708, "synch_weights_time_ms": 11.765}, "counters": {"num_env_steps_sampled": 5081600, "num_env_steps_trained": 5081600, "num_agent_steps_sampled": 10163200, "num_agent_steps_trained": 10163200}, "done": false, "episodes_total": 12704, "training_iteration": 397, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-14", "timestamp": 1666581974, "time_this_iter_s": 3.645744562149048, "time_total_s": 1514.2933101654053, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1514.2933101654053, "timesteps_since_restore": 0, "iterations_since_restore": 397, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.55, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.9, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.27, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.71, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.71, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.71, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002451391890645027, "policy_loss": -0.0028326634783297777, "vf_loss": 7.9434614181518555, "vf_explained_var": 0.5600583553314209, "kl": 0.002439986914396286, "entropy": 0.8261496424674988, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5094400, "num_env_steps_trained": 5094400, "num_agent_steps_sampled": 10188800, "num_agent_steps_trained": 10188800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 565.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.55}, "custom_metrics": {"sparse_reward_mean": 195.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.9, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.27, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.21, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.71, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.88, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.71, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.71, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966483530142179, "mean_inference_ms": 1.2434967982497485, "mean_action_processing_ms": 0.13362693614740914, "mean_env_wait_ms": 0.838467798835966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 565.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.55}, "hist_stats": {"episode_reward": [582.0, 573.0, 573.0, 582.0, 525.0, 573.0, 570.0, 576.0, 516.0, 627.0, 579.0, 579.0, 573.0, 579.0, 582.0, 579.0, 576.0, 579.0, 576.0, 525.0, 627.0, 576.0, 530.0, 522.0, 582.0, 579.0, 573.0, 579.0, 573.0, 525.0, 576.0, 582.0, 576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 280.0, 293.0, 291.0, 282.0, 293.0, 289.0, 259.0, 266.0, 289.0, 284.0, 282.0, 288.0, 293.0, 283.0, 251.0, 265.0, 310.0, 317.0, 290.0, 289.0, 282.0, 297.0, 287.0, 286.0, 288.0, 291.0, 288.0, 294.0, 294.0, 285.0, 287.0, 289.0, 289.0, 290.0, 291.0, 285.0, 260.0, 265.0, 313.0, 314.0, 285.0, 291.0, 262.0, 268.0, 261.0, 261.0, 294.0, 288.0, 285.0, 294.0, 285.0, 288.0, 293.0, 286.0, 285.0, 288.0, 260.0, 265.0, 289.0, 287.0, 287.0, 295.0, 285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966483530142179, "mean_inference_ms": 1.2434967982497485, "mean_action_processing_ms": 0.13362693614740914, "mean_env_wait_ms": 0.838467798835966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10188800, "num_agent_steps_trained": 10188800, "num_env_steps_sampled": 5094400, "num_env_steps_trained": 5094400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5094400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10188800, "timers": {"training_iteration_time_ms": 3615.429, "learn_time_ms": 1112.523, "learn_throughput": 11505.382, "synch_weights_time_ms": 11.227}, "counters": {"num_env_steps_sampled": 5094400, "num_env_steps_trained": 5094400, "num_agent_steps_sampled": 10188800, "num_agent_steps_trained": 10188800}, "done": false, "episodes_total": 12736, "training_iteration": 398, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-18", "timestamp": 1666581978, "time_this_iter_s": 3.9683709144592285, "time_total_s": 1518.2616810798645, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1518.2616810798645, "timesteps_since_restore": 0, "iterations_since_restore": 398, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.883333333333336, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.97, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.02, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.18, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.76, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.76, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.76, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00010686222231015563, "policy_loss": -0.00027928390773013234, "vf_loss": 7.995491027832031, "vf_explained_var": 0.5691348314285278, "kl": 0.003024071455001831, "entropy": 0.8268085718154907, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5107200, "num_env_steps_trained": 5107200, "num_agent_steps_sampled": 10214400, "num_agent_steps_trained": 10214400}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 562.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 281.285}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.97, "shaped_reward_min": 138, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.02, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.18, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.11, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.57, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.76, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.18, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.99, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.94, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.57, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.76, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.57, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.76, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966158957094319, "mean_inference_ms": 1.243448250922046, "mean_action_processing_ms": 0.13362095833138962, "mean_env_wait_ms": 0.8385006632752647, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 562.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 232.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 281.285}, "hist_stats": {"episode_reward": [576.0, 570.0, 525.0, 576.0, 582.0, 576.0, 582.0, 468.0, 570.0, 570.0, 576.0, 582.0, 519.0, 579.0, 579.0, 525.0, 587.0, 579.0, 582.0, 582.0, 570.0, 522.0, 579.0, 576.0, 525.0, 579.0, 579.0, 570.0, 582.0, 576.0, 582.0, 579.0, 573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 285.0, 285.0, 263.0, 262.0, 275.0, 301.0, 296.0, 286.0, 292.0, 284.0, 288.0, 294.0, 236.0, 232.0, 285.0, 285.0, 282.0, 288.0, 285.0, 291.0, 288.0, 294.0, 267.0, 252.0, 290.0, 289.0, 294.0, 285.0, 258.0, 267.0, 291.0, 296.0, 286.0, 293.0, 290.0, 292.0, 293.0, 289.0, 283.0, 287.0, 268.0, 254.0, 288.0, 291.0, 288.0, 288.0, 268.0, 257.0, 289.0, 290.0, 288.0, 291.0, 287.0, 283.0, 296.0, 286.0, 291.0, 285.0, 295.0, 287.0, 294.0, 285.0, 286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6966158957094319, "mean_inference_ms": 1.243448250922046, "mean_action_processing_ms": 0.13362095833138962, "mean_env_wait_ms": 0.8385006632752647, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10214400, "num_agent_steps_trained": 10214400, "num_env_steps_sampled": 5107200, "num_env_steps_trained": 5107200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5107200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10214400, "timers": {"training_iteration_time_ms": 3630.802, "learn_time_ms": 1121.196, "learn_throughput": 11416.384, "synch_weights_time_ms": 10.982}, "counters": {"num_env_steps_sampled": 5107200, "num_env_steps_trained": 5107200, "num_agent_steps_sampled": 10214400, "num_agent_steps_trained": 10214400}, "done": false, "episodes_total": 12768, "training_iteration": 399, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-22", "timestamp": 1666581982, "time_this_iter_s": 3.7618231773376465, "time_total_s": 1522.0235042572021, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1522.0235042572021, "timesteps_since_restore": 0, "iterations_since_restore": 399, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.62}} +{"evaluation": {"average_sparse_reward": 200.0, "num_healthy_workers": 0, "num_recreated_workers": 0}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.6, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.47, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.2, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.2, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.2, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002051019575446844, "policy_loss": -0.0024280953221023083, "vf_loss": 7.887781143188477, "vf_explained_var": 0.6008471250534058, "kl": 0.003506100969389081, "entropy": 0.8234077095985413, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5120000, "num_env_steps_trained": 5120000, "num_agent_steps_sampled": 10240000, "num_agent_steps_trained": 10240000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 560.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 59.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 280.3}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 172.6, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.6, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.47, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.12, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.2, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.05, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.2, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.2, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6965676636854887, "mean_inference_ms": 1.2434046138073682, "mean_action_processing_ms": 0.13361575469972117, "mean_env_wait_ms": 0.8385373096956714, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 560.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 59.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 280.3}, "hist_stats": {"episode_reward": [573.0, 576.0, 582.0, 579.0, 525.0, 573.0, 582.0, 530.0, 573.0, 590.0, 525.0, 530.0, 498.0, 576.0, 576.0, 582.0, 573.0, 522.0, 573.0, 522.0, 573.0, 579.0, 519.0, 527.0, 516.0, 576.0, 581.0, 573.0, 573.0, 579.0, 525.0, 582.0, 564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 286.0, 290.0, 287.0, 295.0, 289.0, 290.0, 260.0, 265.0, 283.0, 290.0, 292.0, 290.0, 262.0, 268.0, 290.0, 283.0, 297.0, 293.0, 264.0, 261.0, 260.0, 270.0, 249.0, 249.0, 285.0, 291.0, 289.0, 287.0, 291.0, 291.0, 285.0, 288.0, 268.0, 254.0, 288.0, 285.0, 264.0, 258.0, 283.0, 290.0, 288.0, 291.0, 266.0, 253.0, 270.0, 257.0, 255.0, 261.0, 296.0, 280.0, 284.0, 297.0, 285.0, 288.0, 289.0, 284.0, 288.0, 291.0, 265.0, 260.0, 287.0, 295.0, 282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6965676636854887, "mean_inference_ms": 1.2434046138073682, "mean_action_processing_ms": 0.13361575469972117, "mean_env_wait_ms": 0.8385373096956714, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10240000, "num_agent_steps_trained": 10240000, "num_env_steps_sampled": 5120000, "num_env_steps_trained": 5120000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5120000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10240000, "timers": {"training_iteration_time_ms": 3619.382, "learn_time_ms": 1115.094, "learn_throughput": 11478.853, "synch_weights_time_ms": 11.084}, "counters": {"num_env_steps_sampled": 5120000, "num_env_steps_trained": 5120000, "num_agent_steps_sampled": 10240000, "num_agent_steps_trained": 10240000}, "done": false, "episodes_total": 12800, "training_iteration": 400, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-29", "timestamp": 1666581989, "time_this_iter_s": 7.1384642124176025, "time_total_s": 1529.1619684696198, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1529.1619684696198, "timesteps_since_restore": 0, "iterations_since_restore": 400, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 14.381818181818183, "ram_util_percent": 10.599999999999998}} +{"custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 171.35, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.1, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.1, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.1, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0029064586851745844, "policy_loss": -0.0032941235695034266, "vf_loss": 8.069677352905273, "vf_explained_var": 0.5652687549591064, "kl": 0.002700523007661104, "entropy": 0.8386068344116211, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5132800, "num_env_steps_trained": 5132800, "num_agent_steps_sampled": 10265600, "num_agent_steps_trained": 10265600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 556.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 278.275}, "custom_metrics": {"sparse_reward_mean": 192.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 171.35, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.34, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.57, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.23, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.11, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.91, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.1, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.0, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.83, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.0, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.91, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.1, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.91, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.1, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6965010651464741, "mean_inference_ms": 1.2432963575523006, "mean_action_processing_ms": 0.13361169316252014, "mean_env_wait_ms": 0.8384959904466414, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 556.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 278.275}, "hist_stats": {"episode_reward": [564.0, 579.0, 576.0, 576.0, 576.0, 573.0, 525.0, 530.0, 579.0, 576.0, 549.0, 579.0, 576.0, 576.0, 590.0, 573.0, 522.0, 516.0, 579.0, 525.0, 573.0, 468.0, 576.0, 573.0, 573.0, 579.0, 525.0, 573.0, 627.0, 582.0, 576.0, 522.0, 576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 282.0, 288.0, 291.0, 285.0, 291.0, 292.0, 284.0, 281.0, 295.0, 284.0, 289.0, 267.0, 258.0, 268.0, 262.0, 288.0, 291.0, 291.0, 285.0, 281.0, 268.0, 291.0, 288.0, 291.0, 285.0, 284.0, 292.0, 294.0, 296.0, 290.0, 283.0, 270.0, 252.0, 254.0, 262.0, 293.0, 286.0, 261.0, 264.0, 292.0, 281.0, 233.0, 235.0, 290.0, 286.0, 281.0, 292.0, 284.0, 289.0, 282.0, 297.0, 262.0, 263.0, 284.0, 289.0, 311.0, 316.0, 287.0, 295.0, 286.0, 290.0, 260.0, 262.0, 294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6965010651464741, "mean_inference_ms": 1.2432963575523006, "mean_action_processing_ms": 0.13361169316252014, "mean_env_wait_ms": 0.8384959904466414, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10265600, "num_agent_steps_trained": 10265600, "num_env_steps_sampled": 5132800, "num_env_steps_trained": 5132800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5132800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10265600, "timers": {"training_iteration_time_ms": 3617.502, "learn_time_ms": 1119.217, "learn_throughput": 11436.564, "synch_weights_time_ms": 10.734}, "counters": {"num_env_steps_sampled": 5132800, "num_env_steps_trained": 5132800, "num_agent_steps_sampled": 10265600, "num_agent_steps_trained": 10265600}, "done": false, "episodes_total": 12832, "training_iteration": 401, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-33", "timestamp": 1666581993, "time_this_iter_s": 3.6597113609313965, "time_total_s": 1532.8216798305511, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1532.8216798305511, "timesteps_since_restore": 0, "iterations_since_restore": 401, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.959999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.69, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.8, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.49, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.8, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.49, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.8, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.49, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003041105344891548, "policy_loss": 0.002663327381014824, "vf_loss": 7.898676872253418, "vf_explained_var": 0.5853292942047119, "kl": 0.004315956961363554, "entropy": 0.8241794109344482, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5145600, "num_env_steps_trained": 5145600, "num_agent_steps_sampled": 10291200, "num_agent_steps_trained": 10291200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 559.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 279.945}, "custom_metrics": {"sparse_reward_mean": 193.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.69, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.3, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.9, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.22, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.69, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.8, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.49, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.96, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.68, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.64, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.8, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.49, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.8, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.49, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696437434705532, "mean_inference_ms": 1.2431677188040304, "mean_action_processing_ms": 0.1336046089583616, "mean_env_wait_ms": 0.8384268867865966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 559.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 279.945}, "hist_stats": {"episode_reward": [576.0, 582.0, 570.0, 576.0, 579.0, 579.0, 579.0, 582.0, 582.0, 570.0, 578.0, 579.0, 579.0, 570.0, 579.0, 579.0, 579.0, 579.0, 530.0, 123.0, 579.0, 639.0, 582.0, 582.0, 525.0, 570.0, 576.0, 587.0, 576.0, 519.0, 582.0, 582.0, 582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 282.0, 290.0, 292.0, 277.0, 293.0, 275.0, 301.0, 290.0, 289.0, 288.0, 291.0, 289.0, 290.0, 291.0, 291.0, 289.0, 293.0, 287.0, 283.0, 296.0, 282.0, 294.0, 285.0, 291.0, 288.0, 286.0, 284.0, 285.0, 294.0, 292.0, 287.0, 288.0, 291.0, 289.0, 290.0, 264.0, 266.0, 59.0, 64.0, 288.0, 291.0, 319.0, 320.0, 288.0, 294.0, 288.0, 294.0, 266.0, 259.0, 285.0, 285.0, 284.0, 292.0, 289.0, 298.0, 287.0, 289.0, 261.0, 258.0, 291.0, 291.0, 296.0, 286.0, 290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696437434705532, "mean_inference_ms": 1.2431677188040304, "mean_action_processing_ms": 0.1336046089583616, "mean_env_wait_ms": 0.8384268867865966, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10291200, "num_agent_steps_trained": 10291200, "num_env_steps_sampled": 5145600, "num_env_steps_trained": 5145600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5145600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10291200, "timers": {"training_iteration_time_ms": 3608.289, "learn_time_ms": 1113.078, "learn_throughput": 11499.647, "synch_weights_time_ms": 11.262}, "counters": {"num_env_steps_sampled": 5145600, "num_env_steps_trained": 5145600, "num_agent_steps_sampled": 10291200, "num_agent_steps_trained": 10291200}, "done": false, "episodes_total": 12864, "training_iteration": 402, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-37", "timestamp": 1666581997, "time_this_iter_s": 3.6457085609436035, "time_total_s": 1536.4673883914948, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1536.4673883914948, "timesteps_since_restore": 0, "iterations_since_restore": 402, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.516666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 173.79, "shaped_reward_min": 26, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0022524138912558556, "policy_loss": -0.0026310046669095755, "vf_loss": 7.907987117767334, "vf_explained_var": 0.5833997130393982, "kl": 0.002337719313800335, "entropy": 0.824416995048523, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5158400, "num_env_steps_trained": 5158400, "num_agent_steps_sampled": 10316800, "num_agent_steps_trained": 10316800}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 66.0, "episode_reward_mean": 563.39, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 281.695}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 173.79, "shaped_reward_min": 26, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.56, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.84, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.45, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.03, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.06, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.75, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.03, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.03, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963864365610132, "mean_inference_ms": 1.243033695858959, "mean_action_processing_ms": 0.1335977157557835, "mean_env_wait_ms": 0.8383555357581876, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 66.0, "episode_reward_mean": 563.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 281.695}, "hist_stats": {"episode_reward": [582.0, 579.0, 576.0, 525.0, 582.0, 579.0, 519.0, 573.0, 573.0, 525.0, 66.0, 419.0, 582.0, 576.0, 522.0, 579.0, 570.0, 513.0, 513.0, 576.0, 582.0, 527.0, 579.0, 582.0, 570.0, 582.0, 576.0, 627.0, 573.0, 582.0, 579.0, 579.0, 582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 289.0, 290.0, 281.0, 295.0, 260.0, 265.0, 283.0, 299.0, 284.0, 295.0, 256.0, 263.0, 283.0, 290.0, 280.0, 293.0, 260.0, 265.0, 31.0, 35.0, 210.0, 209.0, 289.0, 293.0, 289.0, 287.0, 261.0, 261.0, 292.0, 287.0, 284.0, 286.0, 257.0, 256.0, 255.0, 258.0, 285.0, 291.0, 294.0, 288.0, 257.0, 270.0, 290.0, 289.0, 291.0, 291.0, 282.0, 288.0, 293.0, 289.0, 291.0, 285.0, 316.0, 311.0, 288.0, 285.0, 292.0, 290.0, 290.0, 289.0, 291.0, 288.0, 293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963864365610132, "mean_inference_ms": 1.243033695858959, "mean_action_processing_ms": 0.1335977157557835, "mean_env_wait_ms": 0.8383555357581876, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10316800, "num_agent_steps_trained": 10316800, "num_env_steps_sampled": 5158400, "num_env_steps_trained": 5158400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5158400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10316800, "timers": {"training_iteration_time_ms": 3602.134, "learn_time_ms": 1104.185, "learn_throughput": 11592.264, "synch_weights_time_ms": 11.262}, "counters": {"num_env_steps_sampled": 5158400, "num_env_steps_trained": 5158400, "num_agent_steps_sampled": 10316800, "num_agent_steps_trained": 10316800}, "done": false, "episodes_total": 12896, "training_iteration": 403, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-40", "timestamp": 1666582000, "time_this_iter_s": 3.616574764251709, "time_total_s": 1540.0839631557465, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1540.0839631557465, "timesteps_since_restore": 0, "iterations_since_restore": 403, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.82, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.15, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.98, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0013055593008175492, "policy_loss": -0.0016851406544446945, "vf_loss": 7.959023952484131, "vf_explained_var": 0.5701768398284912, "kl": 0.0024630685802549124, "entropy": 0.8326427340507507, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5171200, "num_env_steps_trained": 5171200, "num_agent_steps_sampled": 10342400, "num_agent_steps_trained": 10342400}, "sampler_results": {"episode_reward_max": 624.0, "episode_reward_min": 462.0, "episode_reward_mean": 567.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 283.61}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.82, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.15, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.4, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.98, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 14.93, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.69, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.88, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.76, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 14.93, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.69, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.93, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.69, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963418754833549, "mean_inference_ms": 1.2429065862961082, "mean_action_processing_ms": 0.13359205131518181, "mean_env_wait_ms": 0.8382902032666086, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 624.0, "episode_reward_min": 462.0, "episode_reward_mean": 567.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 316.0}, "policy_reward_mean": {"ppo": 283.61}, "hist_stats": {"episode_reward": [582.0, 576.0, 573.0, 522.0, 582.0, 579.0, 579.0, 570.0, 579.0, 579.0, 582.0, 587.0, 576.0, 582.0, 582.0, 522.0, 582.0, 582.0, 573.0, 579.0, 579.0, 573.0, 567.0, 522.0, 573.0, 573.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 291.0, 285.0, 282.0, 291.0, 263.0, 259.0, 290.0, 292.0, 288.0, 291.0, 288.0, 291.0, 288.0, 282.0, 283.0, 296.0, 285.0, 294.0, 289.0, 293.0, 296.0, 291.0, 287.0, 289.0, 294.0, 288.0, 285.0, 297.0, 259.0, 263.0, 295.0, 287.0, 290.0, 292.0, 290.0, 283.0, 290.0, 289.0, 288.0, 291.0, 283.0, 290.0, 277.0, 290.0, 254.0, 268.0, 282.0, 291.0, 292.0, 281.0, 290.0, 289.0, 285.0, 294.0, 291.0, 288.0, 291.0, 288.0, 289.0, 290.0, 289.0, 287.0, 254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963418754833549, "mean_inference_ms": 1.2429065862961082, "mean_action_processing_ms": 0.13359205131518181, "mean_env_wait_ms": 0.8382902032666086, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10342400, "num_agent_steps_trained": 10342400, "num_env_steps_sampled": 5171200, "num_env_steps_trained": 5171200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5171200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10342400, "timers": {"training_iteration_time_ms": 3612.65, "learn_time_ms": 1108.936, "learn_throughput": 11542.593, "synch_weights_time_ms": 11.868}, "counters": {"num_env_steps_sampled": 5171200, "num_env_steps_trained": 5171200, "num_agent_steps_sampled": 10342400, "num_agent_steps_trained": 10342400}, "done": false, "episodes_total": 12928, "training_iteration": 404, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-44", "timestamp": 1666582004, "time_this_iter_s": 3.6314189434051514, "time_total_s": 1543.7153820991516, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1543.7153820991516, "timesteps_since_restore": 0, "iterations_since_restore": 404, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.96, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.9, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.26, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.26, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.26, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002099148230627179, "policy_loss": -0.0024678613990545273, "vf_loss": 7.786614894866943, "vf_explained_var": 0.5827337503433228, "kl": 0.002610996598377824, "entropy": 0.8198966979980469, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5184000, "num_env_steps_trained": 5184000, "num_agent_steps_sampled": 10368000, "num_agent_steps_trained": 10368000}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 568.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 284.05}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.9, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.88, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.81, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.65, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.3, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.26, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.25, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.16, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.3, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.26, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.3, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.26, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963077094515928, "mean_inference_ms": 1.2427992128696683, "mean_action_processing_ms": 0.13358854048906127, "mean_env_wait_ms": 0.8382407331104629, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 568.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 284.05}, "hist_stats": {"episode_reward": [522.0, 579.0, 567.0, 579.0, 579.0, 582.0, 579.0, 525.0, 570.0, 579.0, 530.0, 525.0, 573.0, 582.0, 584.0, 573.0, 573.0, 573.0, 527.0, 587.0, 527.0, 570.0, 579.0, 576.0, 579.0, 582.0, 624.0, 579.0, 576.0, 579.0, 579.0, 579.0, 576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [254.0, 268.0, 289.0, 290.0, 271.0, 296.0, 289.0, 290.0, 293.0, 286.0, 294.0, 288.0, 285.0, 294.0, 260.0, 265.0, 277.0, 293.0, 286.0, 293.0, 262.0, 268.0, 266.0, 259.0, 286.0, 287.0, 291.0, 291.0, 290.0, 294.0, 284.0, 289.0, 279.0, 294.0, 286.0, 287.0, 260.0, 267.0, 298.0, 289.0, 261.0, 266.0, 282.0, 288.0, 281.0, 298.0, 293.0, 283.0, 288.0, 291.0, 297.0, 285.0, 316.0, 308.0, 294.0, 285.0, 283.0, 293.0, 281.0, 298.0, 293.0, 286.0, 291.0, 288.0, 285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6963077094515928, "mean_inference_ms": 1.2427992128696683, "mean_action_processing_ms": 0.13358854048906127, "mean_env_wait_ms": 0.8382407331104629, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10368000, "num_agent_steps_trained": 10368000, "num_env_steps_sampled": 5184000, "num_env_steps_trained": 5184000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5184000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10368000, "timers": {"training_iteration_time_ms": 3632.726, "learn_time_ms": 1118.741, "learn_throughput": 11441.431, "synch_weights_time_ms": 11.388}, "counters": {"num_env_steps_sampled": 5184000, "num_env_steps_trained": 5184000, "num_agent_steps_sampled": 10368000, "num_agent_steps_trained": 10368000}, "done": false, "episodes_total": 12960, "training_iteration": 405, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-48", "timestamp": 1666582008, "time_this_iter_s": 3.759228229522705, "time_total_s": 1547.4746103286743, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1547.4746103286743, "timesteps_since_restore": 0, "iterations_since_restore": 405, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.95, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.99, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004778525326400995, "policy_loss": 0.00010655797086656094, "vf_loss": 7.847720146179199, "vf_explained_var": 0.5784578919410706, "kl": 0.002981501165777445, "entropy": 0.8269562721252441, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5196800, "num_env_steps_trained": 5196800, "num_agent_steps_sampled": 10393600, "num_agent_steps_trained": 10393600}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 565.59, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.795}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.99, "shaped_reward_min": 142, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.59, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.49, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.04, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.44, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 25, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.84, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.04, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.44, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 25, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.04, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.44, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 25, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6962743444013426, "mean_inference_ms": 1.2426879235154644, "mean_action_processing_ms": 0.13358549381719295, "mean_env_wait_ms": 0.8381940028659765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 462.0, "episode_reward_mean": 565.59, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 230.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 282.795}, "hist_stats": {"episode_reward": [576.0, 587.0, 590.0, 576.0, 522.0, 579.0, 525.0, 473.0, 570.0, 576.0, 522.0, 579.0, 573.0, 573.0, 579.0, 573.0, 579.0, 519.0, 576.0, 582.0, 576.0, 525.0, 570.0, 462.0, 582.0, 579.0, 573.0, 570.0, 582.0, 579.0, 582.0, 527.0, 570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 291.0, 301.0, 286.0, 297.0, 293.0, 288.0, 288.0, 263.0, 259.0, 289.0, 290.0, 259.0, 266.0, 235.0, 238.0, 285.0, 285.0, 281.0, 295.0, 261.0, 261.0, 285.0, 294.0, 290.0, 283.0, 289.0, 284.0, 292.0, 287.0, 280.0, 293.0, 291.0, 288.0, 262.0, 257.0, 287.0, 289.0, 285.0, 297.0, 286.0, 290.0, 264.0, 261.0, 279.0, 291.0, 232.0, 230.0, 293.0, 289.0, 288.0, 291.0, 287.0, 286.0, 281.0, 289.0, 295.0, 287.0, 280.0, 299.0, 287.0, 295.0, 264.0, 263.0, 284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6962743444013426, "mean_inference_ms": 1.2426879235154644, "mean_action_processing_ms": 0.13358549381719295, "mean_env_wait_ms": 0.8381940028659765, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10393600, "num_agent_steps_trained": 10393600, "num_env_steps_sampled": 5196800, "num_env_steps_trained": 5196800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5196800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10393600, "timers": {"training_iteration_time_ms": 3645.848, "learn_time_ms": 1124.775, "learn_throughput": 11380.057, "synch_weights_time_ms": 11.893}, "counters": {"num_env_steps_sampled": 5196800, "num_env_steps_trained": 5196800, "num_agent_steps_sampled": 10393600, "num_agent_steps_trained": 10393600}, "done": false, "episodes_total": 12992, "training_iteration": 406, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-52", "timestamp": 1666582012, "time_this_iter_s": 3.7158660888671875, "time_total_s": 1551.1904764175415, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1551.1904764175415, "timesteps_since_restore": 0, "iterations_since_restore": 406, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.63333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.35, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.77, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.28, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.28, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.28, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009458856075070798, "policy_loss": -0.0013118607457727194, "vf_loss": 7.801250457763672, "vf_explained_var": 0.5747021436691284, "kl": 0.002498175483196974, "entropy": 0.8282989263534546, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5209600, "num_env_steps_trained": 5209600, "num_agent_steps_sampled": 10419200, "num_agent_steps_trained": 10419200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 567.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 283.775}, "custom_metrics": {"sparse_reward_mean": 196.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.35, "shaped_reward_min": 145, "shaped_reward_max": 190, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.8, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.77, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.65, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.24, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.28, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.84, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.24, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.28, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.24, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.28, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6962311827286716, "mean_inference_ms": 1.242566188112285, "mean_action_processing_ms": 0.133582226305427, "mean_env_wait_ms": 0.8381410128120548, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 465.0, "episode_reward_mean": 567.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 229.0}, "policy_reward_max": {"ppo": 317.0}, "policy_reward_mean": {"ppo": 283.775}, "hist_stats": {"episode_reward": [570.0, 579.0, 527.0, 567.0, 579.0, 570.0, 582.0, 570.0, 582.0, 579.0, 576.0, 579.0, 630.0, 576.0, 525.0, 579.0, 530.0, 570.0, 570.0, 576.0, 570.0, 576.0, 573.0, 579.0, 573.0, 582.0, 584.0, 582.0, 582.0, 587.0, 573.0, 573.0, 579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 286.0, 293.0, 286.0, 258.0, 269.0, 278.0, 289.0, 285.0, 294.0, 286.0, 284.0, 292.0, 290.0, 279.0, 291.0, 292.0, 290.0, 287.0, 292.0, 285.0, 291.0, 280.0, 299.0, 317.0, 313.0, 284.0, 292.0, 263.0, 262.0, 283.0, 296.0, 269.0, 261.0, 282.0, 288.0, 280.0, 290.0, 290.0, 286.0, 290.0, 280.0, 288.0, 288.0, 286.0, 287.0, 291.0, 288.0, 290.0, 283.0, 290.0, 292.0, 292.0, 292.0, 288.0, 294.0, 284.0, 298.0, 286.0, 301.0, 291.0, 282.0, 281.0, 292.0, 293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6962311827286716, "mean_inference_ms": 1.242566188112285, "mean_action_processing_ms": 0.133582226305427, "mean_env_wait_ms": 0.8381410128120548, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10419200, "num_agent_steps_trained": 10419200, "num_env_steps_sampled": 5209600, "num_env_steps_trained": 5209600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5209600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10419200, "timers": {"training_iteration_time_ms": 3655.211, "learn_time_ms": 1130.41, "learn_throughput": 11323.327, "synch_weights_time_ms": 11.094}, "counters": {"num_env_steps_sampled": 5209600, "num_env_steps_trained": 5209600, "num_agent_steps_sampled": 10419200, "num_agent_steps_trained": 10419200}, "done": false, "episodes_total": 13024, "training_iteration": 407, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-26-56", "timestamp": 1666582016, "time_this_iter_s": 3.7446985244750977, "time_total_s": 1554.9351749420166, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1554.9351749420166, "timesteps_since_restore": 0, "iterations_since_restore": 407, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.44, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.7, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.69, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.36, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010337545536458492, "policy_loss": -0.0014150127535685897, "vf_loss": 7.953579902648926, "vf_explained_var": 0.5916837453842163, "kl": 0.00292446231469512, "entropy": 0.828201174736023, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5222400, "num_env_steps_trained": 5222400, "num_agent_steps_sampled": 10444800, "num_agent_steps_trained": 10444800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 559.5, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.75}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 172.7, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.54, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.69, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.36, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.05, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 4.89, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.73, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.7, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.66, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.05, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.05, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6961734234605836, "mean_inference_ms": 1.242427938000899, "mean_action_processing_ms": 0.1335768680504018, "mean_env_wait_ms": 0.8380733234527808, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 559.5, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 279.75}, "hist_stats": {"episode_reward": [579.0, 579.0, 573.0, 576.0, 576.0, 513.0, 579.0, 579.0, 525.0, 519.0, 579.0, 576.0, 579.0, 569.0, 579.0, 519.0, 582.0, 576.0, 582.0, 579.0, 579.0, 579.0, 525.0, 576.0, 522.0, 525.0, 576.0, 573.0, 573.0, 522.0, 579.0, 525.0, 573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 290.0, 289.0, 281.0, 292.0, 283.0, 293.0, 286.0, 290.0, 256.0, 257.0, 290.0, 289.0, 289.0, 290.0, 270.0, 255.0, 260.0, 259.0, 290.0, 289.0, 291.0, 285.0, 286.0, 293.0, 285.0, 284.0, 291.0, 288.0, 253.0, 266.0, 291.0, 291.0, 290.0, 286.0, 288.0, 294.0, 285.0, 294.0, 288.0, 291.0, 293.0, 286.0, 262.0, 263.0, 291.0, 285.0, 272.0, 250.0, 262.0, 263.0, 286.0, 290.0, 286.0, 287.0, 289.0, 284.0, 267.0, 255.0, 292.0, 287.0, 259.0, 266.0, 285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6961734234605836, "mean_inference_ms": 1.242427938000899, "mean_action_processing_ms": 0.1335768680504018, "mean_env_wait_ms": 0.8380733234527808, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10444800, "num_agent_steps_trained": 10444800, "num_env_steps_sampled": 5222400, "num_env_steps_trained": 5222400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5222400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10444800, "timers": {"training_iteration_time_ms": 3618.454, "learn_time_ms": 1124.575, "learn_throughput": 11382.075, "synch_weights_time_ms": 11.401}, "counters": {"num_env_steps_sampled": 5222400, "num_env_steps_trained": 5222400, "num_agent_steps_sampled": 10444800, "num_agent_steps_trained": 10444800}, "done": false, "episodes_total": 13056, "training_iteration": 408, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-00", "timestamp": 1666582020, "time_this_iter_s": 3.608797073364258, "time_total_s": 1558.5439720153809, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1558.5439720153809, "timesteps_since_restore": 0, "iterations_since_restore": 408, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.833333333333332, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.89, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.92, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.47, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.92, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.47, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.92, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.47, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.92, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002713116118684411, "policy_loss": -0.00010633841156959534, "vf_loss": 7.856367588043213, "vf_explained_var": 0.5783417224884033, "kl": 0.003222328145056963, "entropy": 0.815973699092865, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5235200, "num_env_steps_trained": 5235200, "num_agent_steps_sampled": 10470400, "num_agent_steps_trained": 10470400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 562.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.345}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.89, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.92, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.47, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.92, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.96, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.88, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.82, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.47, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.92, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.47, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.92, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696108511703455, "mean_inference_ms": 1.242292456608421, "mean_action_processing_ms": 0.13357113804808782, "mean_env_wait_ms": 0.8380045678347366, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 562.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.345}, "hist_stats": {"episode_reward": [573.0, 579.0, 570.0, 579.0, 573.0, 579.0, 576.0, 582.0, 570.0, 630.0, 525.0, 581.0, 576.0, 579.0, 570.0, 573.0, 579.0, 581.0, 582.0, 573.0, 570.0, 579.0, 579.0, 465.0, 525.0, 573.0, 522.0, 573.0, 579.0, 582.0, 570.0, 522.0, 579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 288.0, 288.0, 291.0, 281.0, 289.0, 286.0, 293.0, 285.0, 288.0, 288.0, 291.0, 292.0, 284.0, 298.0, 284.0, 282.0, 288.0, 316.0, 314.0, 253.0, 272.0, 292.0, 289.0, 288.0, 288.0, 293.0, 286.0, 290.0, 280.0, 284.0, 289.0, 291.0, 288.0, 294.0, 287.0, 293.0, 289.0, 284.0, 289.0, 281.0, 289.0, 293.0, 286.0, 292.0, 287.0, 236.0, 229.0, 264.0, 261.0, 286.0, 287.0, 256.0, 266.0, 288.0, 285.0, 284.0, 295.0, 293.0, 289.0, 290.0, 280.0, 255.0, 267.0, 292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.696108511703455, "mean_inference_ms": 1.242292456608421, "mean_action_processing_ms": 0.13357113804808782, "mean_env_wait_ms": 0.8380045678347366, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10470400, "num_agent_steps_trained": 10470400, "num_env_steps_sampled": 5235200, "num_env_steps_trained": 5235200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5235200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10470400, "timers": {"training_iteration_time_ms": 3606.824, "learn_time_ms": 1117.374, "learn_throughput": 11455.429, "synch_weights_time_ms": 11.576}, "counters": {"num_env_steps_sampled": 5235200, "num_env_steps_trained": 5235200, "num_agent_steps_sampled": 10470400, "num_agent_steps_trained": 10470400}, "done": false, "episodes_total": 13088, "training_iteration": 409, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-03", "timestamp": 1666582023, "time_this_iter_s": 3.6459784507751465, "time_total_s": 1562.189950466156, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1562.189950466156, "timesteps_since_restore": 0, "iterations_since_restore": 409, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.54, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.21, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.44, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010149907320737839, "policy_loss": 0.0006468441570177674, "vf_loss": 7.79993200302124, "vf_explained_var": 0.5932613611221313, "kl": 0.002992228837683797, "entropy": 0.8236936330795288, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5248000, "num_env_steps_trained": 5248000, "num_agent_steps_sampled": 10496000, "num_agent_steps_trained": 10496000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 562.21, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.105}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.21, "shaped_reward_min": 102, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.6, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 15.59, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.44, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.15, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.39, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 6, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.95, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.05, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.79, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.73, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.39, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 6, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.39, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 6, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6960422117755253, "mean_inference_ms": 1.242148343321641, "mean_action_processing_ms": 0.13356248071098079, "mean_env_wait_ms": 0.8379216638832903, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 222.0, "episode_reward_mean": 562.21, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 99.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.105}, "hist_stats": {"episode_reward": [579.0, 527.0, 579.0, 579.0, 579.0, 573.0, 582.0, 579.0, 530.0, 582.0, 582.0, 222.0, 570.0, 516.0, 576.0, 633.0, 582.0, 516.0, 582.0, 525.0, 582.0, 587.0, 522.0, 584.0, 305.0, 573.0, 582.0, 630.0, 576.0, 525.0, 573.0, 527.0, 576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 287.0, 263.0, 264.0, 290.0, 289.0, 288.0, 291.0, 296.0, 283.0, 287.0, 286.0, 287.0, 295.0, 294.0, 285.0, 260.0, 270.0, 291.0, 291.0, 292.0, 290.0, 123.0, 99.0, 280.0, 290.0, 256.0, 260.0, 290.0, 286.0, 317.0, 316.0, 293.0, 289.0, 252.0, 264.0, 294.0, 288.0, 260.0, 265.0, 295.0, 287.0, 293.0, 294.0, 262.0, 260.0, 290.0, 294.0, 151.0, 154.0, 282.0, 291.0, 293.0, 289.0, 311.0, 319.0, 288.0, 288.0, 258.0, 267.0, 291.0, 282.0, 260.0, 267.0, 280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6960422117755253, "mean_inference_ms": 1.242148343321641, "mean_action_processing_ms": 0.13356248071098079, "mean_env_wait_ms": 0.8379216638832903, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10496000, "num_agent_steps_trained": 10496000, "num_env_steps_sampled": 5248000, "num_env_steps_trained": 5248000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5248000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10496000, "timers": {"training_iteration_time_ms": 3604.876, "learn_time_ms": 1118.078, "learn_throughput": 11448.214, "synch_weights_time_ms": 11.981}, "counters": {"num_env_steps_sampled": 5248000, "num_env_steps_trained": 5248000, "num_agent_steps_sampled": 10496000, "num_agent_steps_trained": 10496000}, "done": false, "episodes_total": 13120, "training_iteration": 410, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-07", "timestamp": 1666582027, "time_this_iter_s": 3.6446609497070312, "time_total_s": 1565.834611415863, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1565.834611415863, "timesteps_since_restore": 0, "iterations_since_restore": 410, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.4, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.19, "shaped_reward_min": 148, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.48, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.69, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.97, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.69, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.97, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.69, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.97, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.00011297990567982197, "policy_loss": -0.0004916824400424957, "vf_loss": 7.8775482177734375, "vf_explained_var": 0.5837694406509399, "kl": 0.0028352581430226564, "entropy": 0.8181036114692688, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5260800, "num_env_steps_trained": 5260800, "num_agent_steps_sampled": 10521600, "num_agent_steps_trained": 10521600}, "sampler_results": {"episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 567.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 283.595}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.19, "shaped_reward_min": 148, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.48, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.93, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 3, "useful_onion_drop_agent_1_mean": 0.09, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.69, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.97, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.24, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.98, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.86, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.69, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.97, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.69, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.97, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959714884925522, "mean_inference_ms": 1.2419952146413202, "mean_action_processing_ms": 0.13355366138274052, "mean_env_wait_ms": 0.8378320110425799, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 627.0, "episode_reward_min": 468.0, "episode_reward_mean": 567.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 283.595}, "hist_stats": {"episode_reward": [576.0, 570.0, 582.0, 522.0, 587.0, 576.0, 522.0, 573.0, 573.0, 579.0, 582.0, 576.0, 582.0, 582.0, 579.0, 579.0, 576.0, 573.0, 576.0, 584.0, 573.0, 579.0, 530.0, 579.0, 576.0, 576.0, 576.0, 582.0, 582.0, 582.0, 579.0, 573.0, 579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [280.0, 296.0, 287.0, 283.0, 295.0, 287.0, 261.0, 261.0, 297.0, 290.0, 292.0, 284.0, 259.0, 263.0, 278.0, 295.0, 291.0, 282.0, 291.0, 288.0, 284.0, 298.0, 291.0, 285.0, 292.0, 290.0, 293.0, 289.0, 290.0, 289.0, 288.0, 291.0, 285.0, 291.0, 292.0, 281.0, 291.0, 285.0, 288.0, 296.0, 286.0, 287.0, 286.0, 293.0, 259.0, 271.0, 283.0, 296.0, 289.0, 287.0, 285.0, 291.0, 289.0, 287.0, 288.0, 294.0, 290.0, 292.0, 293.0, 289.0, 291.0, 288.0, 285.0, 288.0, 291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959714884925522, "mean_inference_ms": 1.2419952146413202, "mean_action_processing_ms": 0.13355366138274052, "mean_env_wait_ms": 0.8378320110425799, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10521600, "num_agent_steps_trained": 10521600, "num_env_steps_sampled": 5260800, "num_env_steps_trained": 5260800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5260800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10521600, "timers": {"training_iteration_time_ms": 3605.814, "learn_time_ms": 1120.922, "learn_throughput": 11419.173, "synch_weights_time_ms": 11.381}, "counters": {"num_env_steps_sampled": 5260800, "num_env_steps_trained": 5260800, "num_agent_steps_sampled": 10521600, "num_agent_steps_trained": 10521600}, "done": false, "episodes_total": 13152, "training_iteration": 411, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-11", "timestamp": 1666582031, "time_this_iter_s": 3.681643009185791, "time_total_s": 1569.5162544250488, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1569.5162544250488, "timesteps_since_restore": 0, "iterations_since_restore": 411, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.499999999999996, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.96, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.65, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.73, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.55, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001020381459966302, "policy_loss": 0.0006417357362806797, "vf_loss": 7.845973968505859, "vf_explained_var": 0.5801359415054321, "kl": 0.0029763891361653805, "entropy": 0.8119027614593506, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5273600, "num_env_steps_trained": 5273600, "num_agent_steps_sampled": 10547200, "num_agent_steps_trained": 10547200}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 468.0, "episode_reward_mean": 566.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.48}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 174.96, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.65, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.73, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.55, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.26, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.3, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.11, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.98, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.74, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.26, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.3, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.26, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.3, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959482648112225, "mean_inference_ms": 1.2418496628761109, "mean_action_processing_ms": 0.13354408807769896, "mean_env_wait_ms": 0.8377504940675667, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 468.0, "episode_reward_mean": 566.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.48}, "hist_stats": {"episode_reward": [579.0, 582.0, 525.0, 519.0, 576.0, 573.0, 582.0, 576.0, 576.0, 582.0, 525.0, 570.0, 579.0, 525.0, 522.0, 525.0, 522.0, 582.0, 573.0, 587.0, 582.0, 576.0, 576.0, 576.0, 522.0, 576.0, 582.0, 582.0, 579.0, 573.0, 579.0, 576.0, 579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 291.0, 291.0, 266.0, 259.0, 268.0, 251.0, 289.0, 287.0, 280.0, 293.0, 291.0, 291.0, 292.0, 284.0, 291.0, 285.0, 291.0, 291.0, 256.0, 269.0, 281.0, 289.0, 288.0, 291.0, 260.0, 265.0, 262.0, 260.0, 259.0, 266.0, 265.0, 257.0, 294.0, 288.0, 287.0, 286.0, 298.0, 289.0, 296.0, 286.0, 288.0, 288.0, 291.0, 285.0, 290.0, 286.0, 256.0, 266.0, 290.0, 286.0, 291.0, 291.0, 294.0, 288.0, 290.0, 289.0, 293.0, 280.0, 291.0, 288.0, 286.0, 290.0, 289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959482648112225, "mean_inference_ms": 1.2418496628761109, "mean_action_processing_ms": 0.13354408807769896, "mean_env_wait_ms": 0.8377504940675667, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10547200, "num_agent_steps_trained": 10547200, "num_env_steps_sampled": 5273600, "num_env_steps_trained": 5273600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5273600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10547200, "timers": {"training_iteration_time_ms": 3621.116, "learn_time_ms": 1123.843, "learn_throughput": 11389.493, "synch_weights_time_ms": 11.325}, "counters": {"num_env_steps_sampled": 5273600, "num_env_steps_trained": 5273600, "num_agent_steps_sampled": 10547200, "num_agent_steps_trained": 10547200}, "done": false, "episodes_total": 13184, "training_iteration": 412, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-15", "timestamp": 1666582035, "time_this_iter_s": 3.787947416305542, "time_total_s": 1573.3042018413544, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1573.3042018413544, "timesteps_since_restore": 0, "iterations_since_restore": 412, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.419999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.27, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.59, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.47, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.13, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.13, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.13, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.003513246774673462, "policy_loss": -0.0038899758365005255, "vf_loss": 7.859567165374756, "vf_explained_var": 0.5757486820220947, "kl": 0.0025270835030823946, "entropy": 0.8184552788734436, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5286400, "num_env_steps_trained": 5286400, "num_agent_steps_sampled": 10572800, "num_agent_steps_trained": 10572800}, "sampler_results": {"episode_reward_max": 630.0, "episode_reward_min": 468.0, "episode_reward_mean": 568.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 284.435}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.27, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.59, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.47, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.74, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.13, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.1, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.92, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.78, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.16, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.72, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.13, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.13, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959201132144984, "mean_inference_ms": 1.2417779063156014, "mean_action_processing_ms": 0.13353379320628925, "mean_env_wait_ms": 0.8377310076399764, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 630.0, "episode_reward_min": 468.0, "episode_reward_mean": 568.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 234.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 284.435}, "hist_stats": {"episode_reward": [579.0, 587.0, 522.0, 579.0, 579.0, 519.0, 573.0, 593.0, 579.0, 579.0, 579.0, 576.0, 576.0, 573.0, 570.0, 570.0, 525.0, 522.0, 582.0, 570.0, 573.0, 525.0, 516.0, 579.0, 576.0, 530.0, 627.0, 579.0, 579.0, 579.0, 539.0, 468.0, 582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 290.0, 296.0, 291.0, 258.0, 264.0, 296.0, 283.0, 292.0, 287.0, 249.0, 270.0, 286.0, 287.0, 297.0, 296.0, 296.0, 283.0, 291.0, 288.0, 290.0, 289.0, 292.0, 284.0, 289.0, 287.0, 288.0, 285.0, 283.0, 287.0, 287.0, 283.0, 263.0, 262.0, 256.0, 266.0, 289.0, 293.0, 278.0, 292.0, 288.0, 285.0, 260.0, 265.0, 265.0, 251.0, 292.0, 287.0, 287.0, 289.0, 263.0, 267.0, 307.0, 320.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 270.0, 269.0, 234.0, 234.0, 290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959201132144984, "mean_inference_ms": 1.2417779063156014, "mean_action_processing_ms": 0.13353379320628925, "mean_env_wait_ms": 0.8377310076399764, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10572800, "num_agent_steps_trained": 10572800, "num_env_steps_sampled": 5286400, "num_env_steps_trained": 5286400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5286400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10572800, "timers": {"training_iteration_time_ms": 3637.716, "learn_time_ms": 1122.363, "learn_throughput": 11404.509, "synch_weights_time_ms": 11.926}, "counters": {"num_env_steps_sampled": 5286400, "num_env_steps_trained": 5286400, "num_agent_steps_sampled": 10572800, "num_agent_steps_trained": 10572800}, "done": false, "episodes_total": 13216, "training_iteration": 413, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-19", "timestamp": 1666582039, "time_this_iter_s": 3.7886815071105957, "time_total_s": 1577.092883348465, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1577.092883348465, "timesteps_since_restore": 0, "iterations_since_restore": 413, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.566666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.81, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.12, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.12, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.12, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0006164342630654573, "policy_loss": 0.00024116405984386802, "vf_loss": 7.793488025665283, "vf_explained_var": 0.5921193361282349, "kl": 0.002510129939764738, "entropy": 0.8081568479537964, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5299200, "num_env_steps_trained": 5299200, "num_agent_steps_sampled": 10598400, "num_agent_steps_trained": 10598400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 507.0, "episode_reward_mean": 574.01, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.005}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.81, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.54, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.1, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.34, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.12, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.69, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.98, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 4.85, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.42, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.61, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.4, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.56, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.12, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.12, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959090726770885, "mean_inference_ms": 1.241716896421168, "mean_action_processing_ms": 0.13352372525812178, "mean_env_wait_ms": 0.8377185596955341, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 507.0, "episode_reward_mean": 574.01, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.005}, "hist_stats": {"episode_reward": [582.0, 579.0, 582.0, 579.0, 570.0, 584.0, 519.0, 582.0, 630.0, 582.0, 579.0, 579.0, 579.0, 582.0, 630.0, 579.0, 570.0, 576.0, 522.0, 582.0, 579.0, 570.0, 573.0, 584.0, 579.0, 507.0, 584.0, 579.0, 579.0, 573.0, 579.0, 573.0, 579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 285.0, 294.0, 289.0, 293.0, 285.0, 294.0, 279.0, 291.0, 300.0, 284.0, 255.0, 264.0, 289.0, 293.0, 308.0, 322.0, 288.0, 294.0, 288.0, 291.0, 286.0, 293.0, 290.0, 289.0, 290.0, 292.0, 313.0, 317.0, 283.0, 296.0, 283.0, 287.0, 287.0, 289.0, 268.0, 254.0, 292.0, 290.0, 288.0, 291.0, 284.0, 286.0, 282.0, 291.0, 298.0, 286.0, 288.0, 291.0, 251.0, 256.0, 290.0, 294.0, 284.0, 295.0, 289.0, 290.0, 289.0, 284.0, 287.0, 292.0, 290.0, 283.0, 291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6959090726770885, "mean_inference_ms": 1.241716896421168, "mean_action_processing_ms": 0.13352372525812178, "mean_env_wait_ms": 0.8377185596955341, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10598400, "num_agent_steps_trained": 10598400, "num_env_steps_sampled": 5299200, "num_env_steps_trained": 5299200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5299200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10598400, "timers": {"training_iteration_time_ms": 3638.704, "learn_time_ms": 1125.557, "learn_throughput": 11372.149, "synch_weights_time_ms": 11.463}, "counters": {"num_env_steps_sampled": 5299200, "num_env_steps_trained": 5299200, "num_agent_steps_sampled": 10598400, "num_agent_steps_trained": 10598400}, "done": false, "episodes_total": 13248, "training_iteration": 414, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-23", "timestamp": 1666582043, "time_this_iter_s": 3.6348578929901123, "time_total_s": 1580.727741241455, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1580.727741241455, "timesteps_since_restore": 0, "iterations_since_restore": 414, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.66, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.53, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.75, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002787010744214058, "policy_loss": 0.002409706125035882, "vf_loss": 7.769173622131348, "vf_explained_var": 0.5715185403823853, "kl": 0.002564822556450963, "entropy": 0.7992253303527832, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5312000, "num_env_steps_trained": 5312000, "num_agent_steps_sampled": 10624000, "num_agent_steps_trained": 10624000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 574.13, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.065}, "custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 176.53, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.08, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.7, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 14.96, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.51, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.1, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 14.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 17.22, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.75, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.92, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.56, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.74, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.05, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.48, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.54, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.46, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.5, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 14.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 17.22, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 14.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 17.22, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6958701226777856, "mean_inference_ms": 1.2416536502241968, "mean_action_processing_ms": 0.1335153926353554, "mean_env_wait_ms": 0.8377099069977573, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 574.13, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.065}, "hist_stats": {"episode_reward": [579.0, 573.0, 527.0, 530.0, 522.0, 576.0, 561.0, 582.0, 579.0, 522.0, 573.0, 576.0, 573.0, 584.0, 587.0, 630.0, 579.0, 579.0, 576.0, 582.0, 576.0, 576.0, 579.0, 579.0, 525.0, 579.0, 522.0, 576.0, 513.0, 576.0, 576.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 282.0, 291.0, 266.0, 261.0, 262.0, 268.0, 267.0, 255.0, 285.0, 291.0, 282.0, 279.0, 286.0, 296.0, 292.0, 287.0, 254.0, 268.0, 286.0, 287.0, 294.0, 282.0, 283.0, 290.0, 290.0, 294.0, 291.0, 296.0, 316.0, 314.0, 285.0, 294.0, 289.0, 290.0, 288.0, 288.0, 291.0, 291.0, 288.0, 288.0, 293.0, 283.0, 296.0, 283.0, 295.0, 284.0, 261.0, 264.0, 292.0, 287.0, 256.0, 266.0, 285.0, 291.0, 259.0, 254.0, 292.0, 284.0, 284.0, 292.0, 288.0, 294.0, 290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6958701226777856, "mean_inference_ms": 1.2416536502241968, "mean_action_processing_ms": 0.1335153926353554, "mean_env_wait_ms": 0.8377099069977573, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10624000, "num_agent_steps_trained": 10624000, "num_env_steps_sampled": 5312000, "num_env_steps_trained": 5312000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5312000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10624000, "timers": {"training_iteration_time_ms": 3633.496, "learn_time_ms": 1126.315, "learn_throughput": 11364.496, "synch_weights_time_ms": 11.876}, "counters": {"num_env_steps_sampled": 5312000, "num_env_steps_trained": 5312000, "num_agent_steps_sampled": 10624000, "num_agent_steps_trained": 10624000}, "done": false, "episodes_total": 13280, "training_iteration": 415, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-27", "timestamp": 1666582047, "time_this_iter_s": 3.71405029296875, "time_total_s": 1584.4417915344238, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1584.4417915344238, "timesteps_since_restore": 0, "iterations_since_restore": 415, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.033333333333335, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.4, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.38, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.09, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.92, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.09, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.92, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.09, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.92, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019702562130987644, "policy_loss": -0.0023428606800734997, "vf_loss": 7.736230850219727, "vf_explained_var": 0.5634697079658508, "kl": 0.002214438281953335, "entropy": 0.802035391330719, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5324800, "num_env_steps_trained": 5324800, "num_agent_steps_sampled": 10649600, "num_agent_steps_trained": 10649600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 576.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 288.1}, "custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.4, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.44, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 17.38, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.34, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 17.21, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.09, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.92, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 4.99, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.53, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.86, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.39, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.66, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.35, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.65, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.09, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.92, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.09, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.92, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6958264003113149, "mean_inference_ms": 1.2415504249231188, "mean_action_processing_ms": 0.1335099692280639, "mean_env_wait_ms": 0.8376494855811777, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 576.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 245.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 288.1}, "hist_stats": {"episode_reward": [582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 522.0, 576.0, 579.0, 579.0, 633.0, 576.0, 582.0, 579.0, 579.0, 576.0, 584.0, 579.0, 579.0, 576.0, 579.0, 573.0, 630.0, 584.0, 582.0, 576.0, 513.0, 582.0, 579.0, 587.0, 582.0, 579.0, 582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 289.0, 281.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 290.0, 289.0, 255.0, 267.0, 288.0, 288.0, 293.0, 286.0, 290.0, 289.0, 319.0, 314.0, 285.0, 291.0, 291.0, 291.0, 293.0, 286.0, 291.0, 288.0, 297.0, 279.0, 292.0, 292.0, 291.0, 288.0, 291.0, 288.0, 290.0, 286.0, 296.0, 283.0, 285.0, 288.0, 311.0, 319.0, 300.0, 284.0, 288.0, 294.0, 285.0, 291.0, 245.0, 268.0, 293.0, 289.0, 287.0, 292.0, 293.0, 294.0, 284.0, 298.0, 283.0, 296.0, 290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6958264003113149, "mean_inference_ms": 1.2415504249231188, "mean_action_processing_ms": 0.1335099692280639, "mean_env_wait_ms": 0.8376494855811777, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10649600, "num_agent_steps_trained": 10649600, "num_env_steps_sampled": 5324800, "num_env_steps_trained": 5324800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5324800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10649600, "timers": {"training_iteration_time_ms": 3633.959, "learn_time_ms": 1128.403, "learn_throughput": 11343.467, "synch_weights_time_ms": 11.389}, "counters": {"num_env_steps_sampled": 5324800, "num_env_steps_trained": 5324800, "num_agent_steps_sampled": 10649600, "num_agent_steps_trained": 10649600}, "done": false, "episodes_total": 13312, "training_iteration": 416, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-31", "timestamp": 1666582051, "time_this_iter_s": 3.7223761081695557, "time_total_s": 1588.1641676425934, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1588.1641676425934, "timesteps_since_restore": 0, "iterations_since_restore": 416, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.516666666666666, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.2, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.89, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.46, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.46, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.46, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00037188551505096257, "policy_loss": -1.1931115295737982e-06, "vf_loss": 7.761588096618652, "vf_explained_var": 0.5580202341079712, "kl": 0.002245605457574129, "entropy": 0.8061589598655701, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5337600, "num_env_steps_trained": 5337600, "num_agent_steps_sampled": 10675200, "num_agent_steps_trained": 10675200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 576.0, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.0}, "custom_metrics": {"sparse_reward_mean": 199.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.2, "shaped_reward_min": 150, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.79, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.89, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 25, "useful_onion_pickup_agent_0_mean": 15.73, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.75, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 25, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.5, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.46, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 24, "dish_pickup_agent_0_mean": 5.49, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.07, "dish_pickup_agent_1_min": 1, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.4, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 4.94, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.27, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.8, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.22, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.78, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.02, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.5, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.46, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 24, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.5, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.46, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 24, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957941137045737, "mean_inference_ms": 1.241436612359549, "mean_action_processing_ms": 0.13350729003532036, "mean_env_wait_ms": 0.8376030328400872, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 510.0, "episode_reward_mean": 576.0, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.0}, "hist_stats": {"episode_reward": [582.0, 570.0, 582.0, 579.0, 579.0, 627.0, 570.0, 582.0, 573.0, 573.0, 579.0, 582.0, 570.0, 579.0, 525.0, 510.0, 570.0, 573.0, 579.0, 582.0, 582.0, 573.0, 579.0, 579.0, 576.0, 582.0, 576.0, 579.0, 570.0, 579.0, 630.0, 573.0, 579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 292.0, 290.0, 280.0, 288.0, 294.0, 285.0, 294.0, 291.0, 288.0, 306.0, 321.0, 289.0, 281.0, 290.0, 292.0, 283.0, 290.0, 288.0, 285.0, 293.0, 286.0, 282.0, 300.0, 280.0, 290.0, 285.0, 294.0, 265.0, 260.0, 255.0, 255.0, 285.0, 285.0, 287.0, 286.0, 288.0, 291.0, 290.0, 292.0, 292.0, 290.0, 285.0, 288.0, 288.0, 291.0, 293.0, 286.0, 288.0, 288.0, 293.0, 289.0, 288.0, 288.0, 289.0, 290.0, 295.0, 275.0, 295.0, 284.0, 313.0, 317.0, 285.0, 288.0, 287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957941137045737, "mean_inference_ms": 1.241436612359549, "mean_action_processing_ms": 0.13350729003532036, "mean_env_wait_ms": 0.8376030328400872, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10675200, "num_agent_steps_trained": 10675200, "num_env_steps_sampled": 5337600, "num_env_steps_trained": 5337600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5337600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10675200, "timers": {"training_iteration_time_ms": 3629.299, "learn_time_ms": 1123.612, "learn_throughput": 11391.837, "synch_weights_time_ms": 11.698}, "counters": {"num_env_steps_sampled": 5337600, "num_env_steps_trained": 5337600, "num_agent_steps_sampled": 10675200, "num_agent_steps_trained": 10675200}, "done": false, "episodes_total": 13344, "training_iteration": 417, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-34", "timestamp": 1666582054, "time_this_iter_s": 3.6620047092437744, "time_total_s": 1591.8261723518372, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1591.8261723518372, "timesteps_since_restore": 0, "iterations_since_restore": 417, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.139999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.22, "shaped_reward_min": 162, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.14, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.81, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.14, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.81, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.14, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.81, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.14, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008182072779163718, "policy_loss": -0.0011884444393217564, "vf_loss": 7.7140302658081055, "vf_explained_var": 0.5830328464508057, "kl": 0.002344300504773855, "entropy": 0.8023296594619751, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5350400, "num_env_steps_trained": 5350400, "num_agent_steps_sampled": 10700800, "num_agent_steps_trained": 10700800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 522.0, "episode_reward_mean": 575.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.61}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.22, "shaped_reward_min": 162, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.14, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.05, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.81, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.14, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.39, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.17, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.29, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.91, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.81, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.14, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.81, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.14, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957477904564686, "mean_inference_ms": 1.241316312344584, "mean_action_processing_ms": 0.13350205868262507, "mean_env_wait_ms": 0.837545382909305, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 522.0, "episode_reward_mean": 575.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 256.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.61}, "hist_stats": {"episode_reward": [579.0, 576.0, 576.0, 576.0, 573.0, 590.0, 579.0, 582.0, 576.0, 587.0, 576.0, 570.0, 579.0, 576.0, 573.0, 579.0, 579.0, 576.0, 579.0, 582.0, 579.0, 525.0, 584.0, 582.0, 539.0, 582.0, 579.0, 579.0, 525.0, 573.0, 570.0, 573.0, 582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 282.0, 294.0, 285.0, 291.0, 283.0, 293.0, 289.0, 284.0, 299.0, 291.0, 288.0, 291.0, 292.0, 290.0, 292.0, 284.0, 297.0, 290.0, 285.0, 291.0, 283.0, 287.0, 290.0, 289.0, 283.0, 293.0, 285.0, 288.0, 288.0, 291.0, 294.0, 285.0, 293.0, 283.0, 290.0, 289.0, 293.0, 289.0, 289.0, 290.0, 269.0, 256.0, 296.0, 288.0, 294.0, 288.0, 272.0, 267.0, 287.0, 295.0, 286.0, 293.0, 291.0, 288.0, 262.0, 263.0, 289.0, 284.0, 287.0, 283.0, 287.0, 286.0, 295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957477904564686, "mean_inference_ms": 1.241316312344584, "mean_action_processing_ms": 0.13350205868262507, "mean_env_wait_ms": 0.837545382909305, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10700800, "num_agent_steps_trained": 10700800, "num_env_steps_sampled": 5350400, "num_env_steps_trained": 5350400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5350400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10700800, "timers": {"training_iteration_time_ms": 3634.106, "learn_time_ms": 1129.378, "learn_throughput": 11333.669, "synch_weights_time_ms": 12.346}, "counters": {"num_env_steps_sampled": 5350400, "num_env_steps_trained": 5350400, "num_agent_steps_sampled": 10700800, "num_agent_steps_trained": 10700800}, "done": false, "episodes_total": 13376, "training_iteration": 418, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-38", "timestamp": 1666582058, "time_this_iter_s": 3.6582388877868652, "time_total_s": 1595.484411239624, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1595.484411239624, "timesteps_since_restore": 0, "iterations_since_restore": 418, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.12, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.3, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.15, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0011450252495706081, "policy_loss": -0.0015236774925142527, "vf_loss": 7.782792568206787, "vf_explained_var": 0.5690507888793945, "kl": 0.0021786403376609087, "entropy": 0.7992550134658813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5363200, "num_env_steps_trained": 5363200, "num_agent_steps_sampled": 10726400, "num_agent_steps_trained": 10726400}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 507.0, "episode_reward_mean": 575.12, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.56}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 177.12, "shaped_reward_min": 147, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.4, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.3, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.13, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.93, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.23, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.15, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.12, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.93, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.93, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957078568111794, "mean_inference_ms": 1.2412049370913687, "mean_action_processing_ms": 0.13349627433994735, "mean_env_wait_ms": 0.8374904032152473, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 507.0, "episode_reward_mean": 575.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 253.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 287.56}, "hist_stats": {"episode_reward": [582.0, 582.0, 570.0, 576.0, 579.0, 579.0, 573.0, 579.0, 579.0, 573.0, 579.0, 570.0, 584.0, 573.0, 573.0, 630.0, 533.0, 579.0, 576.0, 582.0, 573.0, 576.0, 587.0, 573.0, 633.0, 530.0, 579.0, 576.0, 530.0, 582.0, 579.0, 573.0, 582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 287.0, 293.0, 289.0, 287.0, 283.0, 285.0, 291.0, 286.0, 293.0, 288.0, 291.0, 285.0, 288.0, 296.0, 283.0, 291.0, 288.0, 291.0, 282.0, 292.0, 287.0, 285.0, 285.0, 288.0, 296.0, 297.0, 276.0, 285.0, 288.0, 319.0, 311.0, 263.0, 270.0, 289.0, 290.0, 290.0, 286.0, 293.0, 289.0, 277.0, 296.0, 292.0, 284.0, 304.0, 283.0, 284.0, 289.0, 324.0, 309.0, 268.0, 262.0, 294.0, 285.0, 287.0, 289.0, 262.0, 268.0, 288.0, 294.0, 294.0, 285.0, 285.0, 288.0, 295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6957078568111794, "mean_inference_ms": 1.2412049370913687, "mean_action_processing_ms": 0.13349627433994735, "mean_env_wait_ms": 0.8374904032152473, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10726400, "num_agent_steps_trained": 10726400, "num_env_steps_sampled": 5363200, "num_env_steps_trained": 5363200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5363200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10726400, "timers": {"training_iteration_time_ms": 3635.528, "learn_time_ms": 1127.91, "learn_throughput": 11348.427, "synch_weights_time_ms": 12.317}, "counters": {"num_env_steps_sampled": 5363200, "num_env_steps_trained": 5363200, "num_agent_steps_sampled": 10726400, "num_agent_steps_trained": 10726400}, "done": false, "episodes_total": 13408, "training_iteration": 419, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-42", "timestamp": 1666582062, "time_this_iter_s": 3.6541497707366943, "time_total_s": 1599.1385610103607, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1599.1385610103607, "timesteps_since_restore": 0, "iterations_since_restore": 419, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.733333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.9, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.44, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.98, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.98, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.98, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00018203884246759117, "policy_loss": -0.00019825922208838165, "vf_loss": 7.813397407531738, "vf_explained_var": 0.5786846280097961, "kl": 0.003914575092494488, "entropy": 0.8020821213722229, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5376000, "num_env_steps_trained": 5376000, "num_agent_steps_sampled": 10752000, "num_agent_steps_trained": 10752000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 573.7, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.85}, "custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.9, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.44, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.98, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.06, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.98, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.98, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6956595979438758, "mean_inference_ms": 1.2410946468727593, "mean_action_processing_ms": 0.1334893448144431, "mean_env_wait_ms": 0.8374309604509989, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 573.7, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.85}, "hist_stats": {"episode_reward": [582.0, 627.0, 579.0, 573.0, 576.0, 579.0, 579.0, 582.0, 522.0, 582.0, 582.0, 576.0, 579.0, 576.0, 570.0, 570.0, 582.0, 630.0, 576.0, 576.0, 576.0, 587.0, 579.0, 570.0, 627.0, 525.0, 576.0, 525.0, 564.0, 579.0, 573.0, 576.0, 576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 287.0, 310.0, 317.0, 293.0, 286.0, 284.0, 289.0, 282.0, 294.0, 290.0, 289.0, 291.0, 288.0, 290.0, 292.0, 265.0, 257.0, 296.0, 286.0, 292.0, 290.0, 286.0, 290.0, 292.0, 287.0, 283.0, 293.0, 290.0, 280.0, 284.0, 286.0, 294.0, 288.0, 316.0, 314.0, 280.0, 296.0, 288.0, 288.0, 283.0, 293.0, 285.0, 302.0, 288.0, 291.0, 279.0, 291.0, 315.0, 312.0, 268.0, 257.0, 287.0, 289.0, 266.0, 259.0, 278.0, 286.0, 292.0, 287.0, 286.0, 287.0, 294.0, 282.0, 288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6956595979438758, "mean_inference_ms": 1.2410946468727593, "mean_action_processing_ms": 0.1334893448144431, "mean_env_wait_ms": 0.8374309604509989, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10752000, "num_agent_steps_trained": 10752000, "num_env_steps_sampled": 5376000, "num_env_steps_trained": 5376000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5376000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10752000, "timers": {"training_iteration_time_ms": 3639.429, "learn_time_ms": 1124.377, "learn_throughput": 11384.079, "synch_weights_time_ms": 12.4}, "counters": {"num_env_steps_sampled": 5376000, "num_env_steps_trained": 5376000, "num_agent_steps_sampled": 10752000, "num_agent_steps_trained": 10752000}, "done": false, "episodes_total": 13440, "training_iteration": 420, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-46", "timestamp": 1666582066, "time_this_iter_s": 3.6863930225372314, "time_total_s": 1602.824954032898, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1602.824954032898, "timesteps_since_restore": 0, "iterations_since_restore": 420, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.44, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.28, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0001731559168547392, "policy_loss": -0.00021116388961672783, "vf_loss": 7.883594512939453, "vf_explained_var": 0.5794853568077087, "kl": 0.00237162783741951, "entropy": 0.8080763816833496, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5388800, "num_env_steps_trained": 5388800, "num_agent_steps_sampled": 10777600, "num_agent_steps_trained": 10777600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 570.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.22}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 176.44, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.28, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.11, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.2, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.78, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.3, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.15, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.15, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.01, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.94, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.78, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.78, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6956192043467821, "mean_inference_ms": 1.240981555894833, "mean_action_processing_ms": 0.13348244018147493, "mean_env_wait_ms": 0.8373655983322869, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 570.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 285.22}, "hist_stats": {"episode_reward": [576.0, 587.0, 579.0, 530.0, 573.0, 573.0, 630.0, 576.0, 579.0, 522.0, 519.0, 582.0, 576.0, 590.0, 525.0, 630.0, 576.0, 579.0, 576.0, 579.0, 573.0, 576.0, 633.0, 576.0, 576.0, 582.0, 525.0, 582.0, 507.0, 579.0, 582.0, 633.0, 533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 288.0, 299.0, 288.0, 291.0, 288.0, 266.0, 264.0, 285.0, 288.0, 283.0, 290.0, 319.0, 311.0, 289.0, 287.0, 290.0, 289.0, 268.0, 254.0, 259.0, 260.0, 288.0, 294.0, 284.0, 292.0, 297.0, 293.0, 264.0, 261.0, 319.0, 311.0, 292.0, 284.0, 289.0, 290.0, 290.0, 286.0, 288.0, 291.0, 284.0, 289.0, 288.0, 288.0, 316.0, 317.0, 286.0, 290.0, 292.0, 284.0, 293.0, 289.0, 256.0, 269.0, 294.0, 288.0, 254.0, 253.0, 287.0, 292.0, 290.0, 292.0, 324.0, 309.0, 269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6956192043467821, "mean_inference_ms": 1.240981555894833, "mean_action_processing_ms": 0.13348244018147493, "mean_env_wait_ms": 0.8373655983322869, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10777600, "num_agent_steps_trained": 10777600, "num_env_steps_sampled": 5388800, "num_env_steps_trained": 5388800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5388800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10777600, "timers": {"training_iteration_time_ms": 3632.633, "learn_time_ms": 1117.212, "learn_throughput": 11457.093, "synch_weights_time_ms": 12.361}, "counters": {"num_env_steps_sampled": 5388800, "num_env_steps_trained": 5388800, "num_agent_steps_sampled": 10777600, "num_agent_steps_trained": 10777600}, "done": false, "episodes_total": 13472, "training_iteration": 421, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-50", "timestamp": 1666582070, "time_this_iter_s": 3.590386152267456, "time_total_s": 1606.4153401851654, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1606.4153401851654, "timesteps_since_restore": 0, "iterations_since_restore": 421, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.716666666666665, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.09, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.05, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.05, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.05, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008794745663180947, "policy_loss": 0.0005092529463581741, "vf_loss": 7.746166706085205, "vf_explained_var": 0.5636370182037354, "kl": 0.0031342788133770227, "entropy": 0.8087892532348633, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5401600, "num_env_steps_trained": 5401600, "num_agent_steps_sampled": 10803200, "num_agent_steps_trained": 10803200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 573.49, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.745}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 177.09, "shaped_reward_min": 119, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.21, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.16, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.2, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.05, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.45, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.07, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.04, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.05, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.05, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955737985586028, "mean_inference_ms": 1.240856488537721, "mean_action_processing_ms": 0.13347434229680755, "mean_env_wait_ms": 0.8372907922870119, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 319.0, "episode_reward_mean": 573.49, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 157.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 286.745}, "hist_stats": {"episode_reward": [533.0, 530.0, 587.0, 584.0, 576.0, 627.0, 533.0, 576.0, 573.0, 576.0, 576.0, 584.0, 636.0, 530.0, 525.0, 639.0, 573.0, 573.0, 582.0, 582.0, 582.0, 573.0, 576.0, 579.0, 576.0, 581.0, 627.0, 579.0, 319.0, 576.0, 579.0, 576.0, 576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [269.0, 264.0, 270.0, 260.0, 294.0, 293.0, 296.0, 288.0, 287.0, 289.0, 306.0, 321.0, 262.0, 271.0, 286.0, 290.0, 288.0, 285.0, 285.0, 291.0, 290.0, 286.0, 285.0, 299.0, 319.0, 317.0, 262.0, 268.0, 270.0, 255.0, 320.0, 319.0, 286.0, 287.0, 286.0, 287.0, 296.0, 286.0, 290.0, 292.0, 295.0, 287.0, 288.0, 285.0, 291.0, 285.0, 289.0, 290.0, 293.0, 283.0, 288.0, 293.0, 301.0, 326.0, 287.0, 292.0, 162.0, 157.0, 294.0, 282.0, 287.0, 292.0, 286.0, 290.0, 295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955737985586028, "mean_inference_ms": 1.240856488537721, "mean_action_processing_ms": 0.13347434229680755, "mean_env_wait_ms": 0.8372907922870119, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10803200, "num_agent_steps_trained": 10803200, "num_env_steps_sampled": 5401600, "num_env_steps_trained": 5401600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5401600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10803200, "timers": {"training_iteration_time_ms": 3602.841, "learn_time_ms": 1102.311, "learn_throughput": 11611.973, "synch_weights_time_ms": 11.848}, "counters": {"num_env_steps_sampled": 5401600, "num_env_steps_trained": 5401600, "num_agent_steps_sampled": 10803200, "num_agent_steps_trained": 10803200}, "done": false, "episodes_total": 13504, "training_iteration": 422, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-53", "timestamp": 1666582073, "time_this_iter_s": 3.482290744781494, "time_total_s": 1609.897630929947, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1609.897630929947, "timesteps_since_restore": 0, "iterations_since_restore": 422, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.599999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.84, "shaped_reward_min": 97, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.16, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002271961886435747, "policy_loss": -0.002665527630597353, "vf_loss": 7.900609016418457, "vf_explained_var": 0.5644186735153198, "kl": 0.005383658222854137, "entropy": 0.7929897904396057, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5414400, "num_env_steps_trained": 5414400, "num_agent_steps_sampled": 10828800, "num_agent_steps_trained": 10828800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 265.0, "episode_reward_mean": 569.84, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 126.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.92}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.84, "shaped_reward_min": 97, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.22, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.16, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.14, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.73, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.89, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.73, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.89, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.73, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.89, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955416679613102, "mean_inference_ms": 1.2407360427750618, "mean_action_processing_ms": 0.13346611969775435, "mean_env_wait_ms": 0.837218924460605, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 265.0, "episode_reward_mean": 569.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 126.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.92}, "hist_stats": {"episode_reward": [576.0, 579.0, 582.0, 579.0, 582.0, 582.0, 579.0, 524.0, 579.0, 576.0, 507.0, 513.0, 579.0, 579.0, 579.0, 579.0, 584.0, 582.0, 576.0, 579.0, 530.0, 570.0, 570.0, 582.0, 525.0, 582.0, 579.0, 579.0, 573.0, 587.0, 582.0, 582.0, 519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 281.0, 291.0, 288.0, 296.0, 286.0, 291.0, 288.0, 294.0, 288.0, 287.0, 295.0, 286.0, 293.0, 257.0, 267.0, 296.0, 283.0, 291.0, 285.0, 247.0, 260.0, 270.0, 243.0, 291.0, 288.0, 286.0, 293.0, 285.0, 294.0, 287.0, 292.0, 289.0, 295.0, 281.0, 301.0, 286.0, 290.0, 289.0, 290.0, 263.0, 267.0, 291.0, 279.0, 283.0, 287.0, 286.0, 296.0, 267.0, 258.0, 293.0, 289.0, 295.0, 284.0, 292.0, 287.0, 290.0, 283.0, 292.0, 295.0, 292.0, 290.0, 294.0, 288.0, 258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955416679613102, "mean_inference_ms": 1.2407360427750618, "mean_action_processing_ms": 0.13346611969775435, "mean_env_wait_ms": 0.837218924460605, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10828800, "num_agent_steps_trained": 10828800, "num_env_steps_sampled": 5414400, "num_env_steps_trained": 5414400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5414400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10828800, "timers": {"training_iteration_time_ms": 3589.332, "learn_time_ms": 1103.678, "learn_throughput": 11597.584, "synch_weights_time_ms": 11.257}, "counters": {"num_env_steps_sampled": 5414400, "num_env_steps_trained": 5414400, "num_agent_steps_sampled": 10828800, "num_agent_steps_trained": 10828800}, "done": false, "episodes_total": 13536, "training_iteration": 423, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-27-57", "timestamp": 1666582077, "time_this_iter_s": 3.654621124267578, "time_total_s": 1613.5522520542145, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1613.5522520542145, "timesteps_since_restore": 0, "iterations_since_restore": 423, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.939999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.58, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.89, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.4, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.25, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032100635580718517, "policy_loss": 0.002823136979714036, "vf_loss": 7.889589309692383, "vf_explained_var": 0.5627092123031616, "kl": 0.0029599564149975777, "entropy": 0.8040643930435181, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5427200, "num_env_steps_trained": 5427200, "num_agent_steps_sampled": 10854400, "num_agent_steps_trained": 10854400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 566.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.09}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 174.58, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.89, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.4, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.76, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.25, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.38, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.84, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.38, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.38, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955096508230176, "mean_inference_ms": 1.2406080064538771, "mean_action_processing_ms": 0.1334579355462166, "mean_env_wait_ms": 0.837142387495199, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 566.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.09}, "hist_stats": {"episode_reward": [519.0, 576.0, 530.0, 584.0, 630.0, 573.0, 570.0, 533.0, 579.0, 576.0, 587.0, 579.0, 576.0, 579.0, 570.0, 516.0, 630.0, 633.0, 627.0, 570.0, 630.0, 579.0, 576.0, 579.0, 525.0, 630.0, 587.0, 573.0, 579.0, 573.0, 579.0, 579.0, 584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [258.0, 261.0, 293.0, 283.0, 255.0, 275.0, 291.0, 293.0, 321.0, 309.0, 285.0, 288.0, 277.0, 293.0, 265.0, 268.0, 287.0, 292.0, 289.0, 287.0, 288.0, 299.0, 286.0, 293.0, 285.0, 291.0, 291.0, 288.0, 291.0, 279.0, 259.0, 257.0, 317.0, 313.0, 319.0, 314.0, 313.0, 314.0, 294.0, 276.0, 314.0, 316.0, 290.0, 289.0, 288.0, 288.0, 292.0, 287.0, 264.0, 261.0, 319.0, 311.0, 288.0, 299.0, 288.0, 285.0, 290.0, 289.0, 293.0, 280.0, 290.0, 289.0, 291.0, 288.0, 290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6955096508230176, "mean_inference_ms": 1.2406080064538771, "mean_action_processing_ms": 0.1334579355462166, "mean_env_wait_ms": 0.837142387495199, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10854400, "num_agent_steps_trained": 10854400, "num_env_steps_sampled": 5427200, "num_env_steps_trained": 5427200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5427200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10854400, "timers": {"training_iteration_time_ms": 3587.87, "learn_time_ms": 1099.972, "learn_throughput": 11636.659, "synch_weights_time_ms": 11.749}, "counters": {"num_env_steps_sampled": 5427200, "num_env_steps_trained": 5427200, "num_agent_steps_sampled": 10854400, "num_agent_steps_trained": 10854400}, "done": false, "episodes_total": 13568, "training_iteration": 424, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-01", "timestamp": 1666582081, "time_this_iter_s": 3.6314749717712402, "time_total_s": 1617.1837270259857, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1617.1837270259857, "timesteps_since_restore": 0, "iterations_since_restore": 424, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.38333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 173.89, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.05, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.74, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.74, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.74, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008383437525480986, "policy_loss": 0.00044354514102451503, "vf_loss": 7.910788536071777, "vf_explained_var": 0.5708798170089722, "kl": 0.0022159921936690807, "entropy": 0.7925610542297363, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5440000, "num_env_steps_trained": 5440000, "num_agent_steps_sampled": 10880000, "num_agent_steps_trained": 10880000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 562.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.345}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 173.89, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.05, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.11, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.84, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.55, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.74, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.31, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.89, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 15.55, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.74, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.55, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.74, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6954787574741181, "mean_inference_ms": 1.2404898821971377, "mean_action_processing_ms": 0.13345151424050777, "mean_env_wait_ms": 0.8370745362550585, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 562.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 281.345}, "hist_stats": {"episode_reward": [584.0, 630.0, 573.0, 582.0, 587.0, 579.0, 570.0, 582.0, 579.0, 297.0, 576.0, 582.0, 581.0, 576.0, 525.0, 579.0, 579.0, 579.0, 576.0, 265.0, 582.0, 579.0, 584.0, 573.0, 525.0, 579.0, 573.0, 573.0, 587.0, 579.0, 579.0, 576.0, 570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 294.0, 314.0, 316.0, 285.0, 288.0, 292.0, 290.0, 289.0, 298.0, 289.0, 290.0, 291.0, 279.0, 291.0, 291.0, 295.0, 284.0, 148.0, 149.0, 281.0, 295.0, 294.0, 288.0, 283.0, 298.0, 289.0, 287.0, 256.0, 269.0, 285.0, 294.0, 292.0, 287.0, 295.0, 284.0, 285.0, 291.0, 139.0, 126.0, 292.0, 290.0, 288.0, 291.0, 287.0, 297.0, 286.0, 287.0, 257.0, 268.0, 282.0, 297.0, 282.0, 291.0, 283.0, 290.0, 294.0, 293.0, 291.0, 288.0, 286.0, 293.0, 286.0, 290.0, 282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6954787574741181, "mean_inference_ms": 1.2404898821971377, "mean_action_processing_ms": 0.13345151424050777, "mean_env_wait_ms": 0.8370745362550585, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10880000, "num_agent_steps_trained": 10880000, "num_env_steps_sampled": 5440000, "num_env_steps_trained": 5440000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5440000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10880000, "timers": {"training_iteration_time_ms": 3577.926, "learn_time_ms": 1094.808, "learn_throughput": 11691.547, "synch_weights_time_ms": 12.038}, "counters": {"num_env_steps_sampled": 5440000, "num_env_steps_trained": 5440000, "num_agent_steps_sampled": 10880000, "num_agent_steps_trained": 10880000}, "done": false, "episodes_total": 13600, "training_iteration": 425, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-05", "timestamp": 1666582085, "time_this_iter_s": 3.607473134994507, "time_total_s": 1620.7912001609802, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1620.7912001609802, "timesteps_since_restore": 0, "iterations_since_restore": 425, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.98, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 175.25, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0019460651092231274, "policy_loss": -0.0023312822449952364, "vf_loss": 7.81488037109375, "vf_explained_var": 0.5743376016616821, "kl": 0.002288772724568844, "entropy": 0.7925402522087097, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5452800, "num_env_steps_trained": 5452800, "num_agent_steps_sampled": 10905600, "num_agent_steps_trained": 10905600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 568.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.025}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 175.25, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.93, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.42, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 15.77, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.26, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.08, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.48, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.03, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 4.9, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.04, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.83, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.48, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.03, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.48, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.03, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6954404501584768, "mean_inference_ms": 1.2403714512167434, "mean_action_processing_ms": 0.13344596157879127, "mean_env_wait_ms": 0.8370100823602792, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 568.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 60.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.025}, "hist_stats": {"episode_reward": [570.0, 579.0, 630.0, 573.0, 576.0, 579.0, 576.0, 576.0, 627.0, 519.0, 579.0, 513.0, 630.0, 570.0, 582.0, 579.0, 587.0, 579.0, 123.0, 570.0, 587.0, 573.0, 579.0, 582.0, 579.0, 576.0, 522.0, 582.0, 579.0, 584.0, 573.0, 582.0, 522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [282.0, 288.0, 289.0, 290.0, 316.0, 314.0, 292.0, 281.0, 296.0, 280.0, 293.0, 286.0, 277.0, 299.0, 291.0, 285.0, 312.0, 315.0, 250.0, 269.0, 291.0, 288.0, 253.0, 260.0, 308.0, 322.0, 285.0, 285.0, 290.0, 292.0, 290.0, 289.0, 296.0, 291.0, 286.0, 293.0, 63.0, 60.0, 288.0, 282.0, 295.0, 292.0, 282.0, 291.0, 288.0, 291.0, 290.0, 292.0, 283.0, 296.0, 290.0, 286.0, 254.0, 268.0, 291.0, 291.0, 290.0, 289.0, 288.0, 296.0, 289.0, 284.0, 294.0, 288.0, 265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6954404501584768, "mean_inference_ms": 1.2403714512167434, "mean_action_processing_ms": 0.13344596157879127, "mean_env_wait_ms": 0.8370100823602792, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10905600, "num_agent_steps_trained": 10905600, "num_env_steps_sampled": 5452800, "num_env_steps_trained": 5452800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5452800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10905600, "timers": {"training_iteration_time_ms": 3580.853, "learn_time_ms": 1095.196, "learn_throughput": 11687.404, "synch_weights_time_ms": 12.223}, "counters": {"num_env_steps_sampled": 5452800, "num_env_steps_trained": 5452800, "num_agent_steps_sampled": 10905600, "num_agent_steps_trained": 10905600}, "done": false, "episodes_total": 13632, "training_iteration": 426, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-09", "timestamp": 1666582089, "time_this_iter_s": 3.7375569343566895, "time_total_s": 1624.528757095337, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1624.528757095337, "timesteps_since_restore": 0, "iterations_since_restore": 426, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.73333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.62, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014132090145722032, "policy_loss": -0.0017901980318129063, "vf_loss": 7.7431488037109375, "vf_explained_var": 0.5979735851287842, "kl": 0.0031745489686727524, "entropy": 0.794650673866272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5465600, "num_env_steps_trained": 5465600, "num_agent_steps_sampled": 10931200, "num_agent_steps_trained": 10931200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 569.22, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.61}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.62, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.15, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.31, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.1, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.74, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.37, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.25, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.06, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.98, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.01, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.74, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.74, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953774070106227, "mean_inference_ms": 1.2402776848961765, "mean_action_processing_ms": 0.13343806812747527, "mean_env_wait_ms": 0.8370408488588459, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 569.22, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.61}, "hist_stats": {"episode_reward": [522.0, 519.0, 636.0, 530.0, 582.0, 576.0, 633.0, 584.0, 582.0, 576.0, 519.0, 579.0, 587.0, 587.0, 576.0, 573.0, 579.0, 579.0, 570.0, 633.0, 579.0, 582.0, 579.0, 579.0, 581.0, 465.0, 524.0, 579.0, 519.0, 627.0, 459.0, 462.0, 587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 257.0, 254.0, 265.0, 316.0, 320.0, 266.0, 264.0, 295.0, 287.0, 289.0, 287.0, 324.0, 309.0, 286.0, 298.0, 292.0, 290.0, 294.0, 282.0, 271.0, 248.0, 291.0, 288.0, 288.0, 299.0, 293.0, 294.0, 286.0, 290.0, 283.0, 290.0, 289.0, 290.0, 290.0, 289.0, 289.0, 281.0, 315.0, 318.0, 291.0, 288.0, 291.0, 291.0, 288.0, 291.0, 286.0, 293.0, 278.0, 303.0, 231.0, 234.0, 258.0, 266.0, 285.0, 294.0, 268.0, 251.0, 308.0, 319.0, 220.0, 239.0, 229.0, 233.0, 293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953774070106227, "mean_inference_ms": 1.2402776848961765, "mean_action_processing_ms": 0.13343806812747527, "mean_env_wait_ms": 0.8370408488588459, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10931200, "num_agent_steps_trained": 10931200, "num_env_steps_sampled": 5465600, "num_env_steps_trained": 5465600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5465600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10931200, "timers": {"training_iteration_time_ms": 3600.212, "learn_time_ms": 1090.835, "learn_throughput": 11734.128, "synch_weights_time_ms": 11.717}, "counters": {"num_env_steps_sampled": 5465600, "num_env_steps_trained": 5465600, "num_agent_steps_sampled": 10931200, "num_agent_steps_trained": 10931200}, "done": false, "episodes_total": 13664, "training_iteration": 427, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-13", "timestamp": 1666582093, "time_this_iter_s": 3.8690433502197266, "time_total_s": 1628.3978004455566, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1628.3978004455566, "timesteps_since_restore": 0, "iterations_since_restore": 427, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.360000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.65, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.68, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.57, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.6, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.35, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.6, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.35, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.6, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.35, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008390977163799107, "policy_loss": -0.0012106244685128331, "vf_loss": 7.669140815734863, "vf_explained_var": 0.5955076217651367, "kl": 0.002353356685489416, "entropy": 0.7907739281654358, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5478400, "num_env_steps_trained": 5478400, "num_agent_steps_sampled": 10956800, "num_agent_steps_trained": 10956800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 577.25, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.625}, "custom_metrics": {"sparse_reward_mean": 199.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.65, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 15.95, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.68, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.86, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.57, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 15.6, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.35, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.21, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.27, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.03, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.12, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.23, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.19, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.6, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.35, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.6, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.35, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953154186385797, "mean_inference_ms": 1.2402239966956277, "mean_action_processing_ms": 0.13342831773414113, "mean_env_wait_ms": 0.837058849138891, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 577.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.625}, "hist_stats": {"episode_reward": [587.0, 636.0, 522.0, 582.0, 576.0, 530.0, 533.0, 582.0, 576.0, 579.0, 570.0, 584.0, 525.0, 522.0, 576.0, 582.0, 576.0, 590.0, 573.0, 582.0, 582.0, 581.0, 584.0, 584.0, 624.0, 579.0, 582.0, 573.0, 522.0, 627.0, 576.0, 627.0, 576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 294.0, 319.0, 317.0, 261.0, 261.0, 292.0, 290.0, 291.0, 285.0, 268.0, 262.0, 267.0, 266.0, 294.0, 288.0, 291.0, 285.0, 283.0, 296.0, 278.0, 292.0, 284.0, 300.0, 266.0, 259.0, 261.0, 261.0, 291.0, 285.0, 296.0, 286.0, 287.0, 289.0, 298.0, 292.0, 296.0, 277.0, 293.0, 289.0, 294.0, 288.0, 293.0, 288.0, 294.0, 290.0, 300.0, 284.0, 311.0, 313.0, 290.0, 289.0, 288.0, 294.0, 285.0, 288.0, 262.0, 260.0, 313.0, 314.0, 288.0, 288.0, 319.0, 308.0, 290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6953154186385797, "mean_inference_ms": 1.2402239966956277, "mean_action_processing_ms": 0.13342831773414113, "mean_env_wait_ms": 0.837058849138891, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10956800, "num_agent_steps_trained": 10956800, "num_env_steps_sampled": 5478400, "num_env_steps_trained": 5478400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5478400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10956800, "timers": {"training_iteration_time_ms": 3611.663, "learn_time_ms": 1089.293, "learn_throughput": 11750.742, "synch_weights_time_ms": 11.348}, "counters": {"num_env_steps_sampled": 5478400, "num_env_steps_trained": 5478400, "num_agent_steps_sampled": 10956800, "num_agent_steps_trained": 10956800}, "done": false, "episodes_total": 13696, "training_iteration": 428, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-17", "timestamp": 1666582097, "time_this_iter_s": 3.7833468914031982, "time_total_s": 1632.1811473369598, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1632.1811473369598, "timesteps_since_restore": 0, "iterations_since_restore": 428, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.900000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.44, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.12, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008235832792706788, "policy_loss": -0.001210855320096016, "vf_loss": 7.875899791717529, "vf_explained_var": 0.5872540473937988, "kl": 0.0029848506674170494, "entropy": 0.8006356954574585, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5491200, "num_env_steps_trained": 5491200, "num_agent_steps_sampled": 10982400, "num_agent_steps_trained": 10982400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 577.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.72}, "custom_metrics": {"sparse_reward_mean": 200.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 177.44, "shaped_reward_min": 119, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.12, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.67, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.15, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.23, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.44, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.09, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 9, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.23, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.23, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6952512538145208, "mean_inference_ms": 1.2401818534278526, "mean_action_processing_ms": 0.13341855729231697, "mean_env_wait_ms": 0.837076962461623, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 399.0, "episode_reward_mean": 577.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 188.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.72}, "hist_stats": {"episode_reward": [576.0, 576.0, 573.0, 584.0, 627.0, 465.0, 590.0, 582.0, 587.0, 507.0, 576.0, 579.0, 399.0, 573.0, 473.0, 630.0, 590.0, 546.0, 579.0, 579.0, 573.0, 522.0, 579.0, 579.0, 576.0, 590.0, 579.0, 627.0, 582.0, 579.0, 582.0, 570.0, 573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [290.0, 286.0, 290.0, 286.0, 284.0, 289.0, 287.0, 297.0, 309.0, 318.0, 233.0, 232.0, 299.0, 291.0, 287.0, 295.0, 295.0, 292.0, 253.0, 254.0, 285.0, 291.0, 288.0, 291.0, 188.0, 211.0, 288.0, 285.0, 242.0, 231.0, 321.0, 309.0, 297.0, 293.0, 274.0, 272.0, 283.0, 296.0, 283.0, 296.0, 288.0, 285.0, 260.0, 262.0, 291.0, 288.0, 289.0, 290.0, 291.0, 285.0, 308.0, 282.0, 289.0, 290.0, 313.0, 314.0, 292.0, 290.0, 291.0, 288.0, 288.0, 294.0, 287.0, 283.0, 286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6952512538145208, "mean_inference_ms": 1.2401818534278526, "mean_action_processing_ms": 0.13341855729231697, "mean_env_wait_ms": 0.837076962461623, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 10982400, "num_agent_steps_trained": 10982400, "num_env_steps_sampled": 5491200, "num_env_steps_trained": 5491200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5491200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 10982400, "timers": {"training_iteration_time_ms": 3610.481, "learn_time_ms": 1089.999, "learn_throughput": 11743.134, "synch_weights_time_ms": 11.371}, "counters": {"num_env_steps_sampled": 5491200, "num_env_steps_trained": 5491200, "num_agent_steps_sampled": 10982400, "num_agent_steps_trained": 10982400}, "done": false, "episodes_total": 13728, "training_iteration": 429, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-20", "timestamp": 1666582100, "time_this_iter_s": 3.6428945064544678, "time_total_s": 1635.8240418434143, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1635.8240418434143, "timesteps_since_restore": 0, "iterations_since_restore": 429, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.27, "shaped_reward_min": 156, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.52, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.28, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.13, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003178417682647705, "policy_loss": 0.0027919402346014977, "vf_loss": 7.772497653961182, "vf_explained_var": 0.5636935234069824, "kl": 0.002719259588047862, "entropy": 0.7815442085266113, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5504000, "num_env_steps_trained": 5504000, "num_agent_steps_sampled": 11008000, "num_agent_steps_trained": 11008000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 516.0, "episode_reward_mean": 579.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.935}, "custom_metrics": {"sparse_reward_mean": 200.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.27, "shaped_reward_min": 156, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.52, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.25, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.28, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.17, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.03, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.02, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.13, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.99, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.03, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.02, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.03, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.02, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6952016011638078, "mean_inference_ms": 1.2401249587874137, "mean_action_processing_ms": 0.13341175036794614, "mean_env_wait_ms": 0.8370252127859996, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 516.0, "episode_reward_mean": 579.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 244.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 289.935}, "hist_stats": {"episode_reward": [573.0, 573.0, 582.0, 584.0, 624.0, 584.0, 579.0, 570.0, 630.0, 584.0, 576.0, 576.0, 576.0, 579.0, 582.0, 582.0, 573.0, 630.0, 579.0, 590.0, 576.0, 579.0, 582.0, 582.0, 627.0, 627.0, 627.0, 579.0, 579.0, 579.0, 579.0, 630.0, 570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 287.0, 276.0, 297.0, 291.0, 291.0, 299.0, 285.0, 301.0, 323.0, 299.0, 285.0, 294.0, 285.0, 282.0, 288.0, 306.0, 324.0, 288.0, 296.0, 290.0, 286.0, 288.0, 288.0, 288.0, 288.0, 288.0, 291.0, 291.0, 291.0, 290.0, 292.0, 286.0, 287.0, 309.0, 321.0, 287.0, 292.0, 296.0, 294.0, 283.0, 293.0, 294.0, 285.0, 295.0, 287.0, 295.0, 287.0, 311.0, 316.0, 306.0, 321.0, 319.0, 308.0, 291.0, 288.0, 293.0, 286.0, 288.0, 291.0, 288.0, 291.0, 308.0, 322.0, 277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6952016011638078, "mean_inference_ms": 1.2401249587874137, "mean_action_processing_ms": 0.13341175036794614, "mean_env_wait_ms": 0.8370252127859996, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11008000, "num_agent_steps_trained": 11008000, "num_env_steps_sampled": 5504000, "num_env_steps_trained": 5504000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5504000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11008000, "timers": {"training_iteration_time_ms": 3615.238, "learn_time_ms": 1099.735, "learn_throughput": 11639.171, "synch_weights_time_ms": 11.164}, "counters": {"num_env_steps_sampled": 5504000, "num_env_steps_trained": 5504000, "num_agent_steps_sampled": 11008000, "num_agent_steps_trained": 11008000}, "done": false, "episodes_total": 13760, "training_iteration": 430, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-24", "timestamp": 1666582104, "time_this_iter_s": 3.7221291065216064, "time_total_s": 1639.546170949936, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1639.546170949936, "timesteps_since_restore": 0, "iterations_since_restore": 430, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.683333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.56, "shaped_reward_min": 133, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.47, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.28, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002081596525385976, "policy_loss": 0.0016915076412260532, "vf_loss": 7.813340663909912, "vf_explained_var": 0.5801672339439392, "kl": 0.002716578310355544, "entropy": 0.7824891805648804, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5516800, "num_env_steps_trained": 5516800, "num_agent_steps_sampled": 11033600, "num_agent_steps_trained": 11033600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 453.0, "episode_reward_mean": 572.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.28}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.56, "shaped_reward_min": 133, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.47, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.28, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.39, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.05, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.11, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.14, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.06, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.02, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.86, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.69515274208325, "mean_inference_ms": 1.2400323722875128, "mean_action_processing_ms": 0.13340651629842287, "mean_env_wait_ms": 0.8369591097856848, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 453.0, "episode_reward_mean": 572.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.28}, "hist_stats": {"episode_reward": [570.0, 573.0, 573.0, 584.0, 576.0, 590.0, 576.0, 538.0, 579.0, 576.0, 582.0, 579.0, 576.0, 576.0, 576.0, 579.0, 576.0, 582.0, 579.0, 579.0, 570.0, 516.0, 579.0, 576.0, 576.0, 519.0, 627.0, 522.0, 579.0, 630.0, 636.0, 584.0, 579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [277.0, 293.0, 279.0, 294.0, 290.0, 283.0, 295.0, 289.0, 277.0, 299.0, 301.0, 289.0, 288.0, 288.0, 275.0, 263.0, 296.0, 283.0, 288.0, 288.0, 291.0, 291.0, 288.0, 291.0, 284.0, 292.0, 285.0, 291.0, 286.0, 290.0, 288.0, 291.0, 289.0, 287.0, 284.0, 298.0, 285.0, 294.0, 294.0, 285.0, 296.0, 274.0, 244.0, 272.0, 293.0, 286.0, 291.0, 285.0, 288.0, 288.0, 266.0, 253.0, 317.0, 310.0, 255.0, 267.0, 293.0, 286.0, 309.0, 321.0, 319.0, 317.0, 293.0, 291.0, 296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.69515274208325, "mean_inference_ms": 1.2400323722875128, "mean_action_processing_ms": 0.13340651629842287, "mean_env_wait_ms": 0.8369591097856848, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11033600, "num_agent_steps_trained": 11033600, "num_env_steps_sampled": 5516800, "num_env_steps_trained": 5516800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5516800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11033600, "timers": {"training_iteration_time_ms": 3627.097, "learn_time_ms": 1104.409, "learn_throughput": 11589.905, "synch_weights_time_ms": 10.991}, "counters": {"num_env_steps_sampled": 5516800, "num_env_steps_trained": 5516800, "num_agent_steps_sampled": 11033600, "num_agent_steps_trained": 11033600}, "done": false, "episodes_total": 13792, "training_iteration": 431, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-28", "timestamp": 1666582108, "time_this_iter_s": 3.7307004928588867, "time_total_s": 1643.2768714427948, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1643.2768714427948, "timesteps_since_restore": 0, "iterations_since_restore": 431, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.03, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.89, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007839496247470379, "policy_loss": 0.0003952703846152872, "vf_loss": 7.842353343963623, "vf_explained_var": 0.5919654369354248, "kl": 0.0032928823493421078, "entropy": 0.7911117076873779, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5529600, "num_env_steps_trained": 5529600, "num_agent_steps_sampled": 11059200, "num_agent_steps_trained": 11059200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 566.63, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.315}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.03, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.68, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.89, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 24, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.73, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 24, "onion_drop_agent_0_mean": 0.17, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.16, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.18, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.15, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.91, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.75, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.18, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.18, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6951108003077172, "mean_inference_ms": 1.2399412656561892, "mean_action_processing_ms": 0.1334012911910848, "mean_env_wait_ms": 0.8369005759153684, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 566.63, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.315}, "hist_stats": {"episode_reward": [579.0, 636.0, 579.0, 516.0, 582.0, 522.0, 573.0, 582.0, 576.0, 584.0, 627.0, 582.0, 576.0, 579.0, 576.0, 630.0, 573.0, 579.0, 573.0, 573.0, 576.0, 576.0, 573.0, 519.0, 627.0, 581.0, 576.0, 579.0, 576.0, 573.0, 527.0, 584.0, 530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 283.0, 317.0, 319.0, 284.0, 295.0, 266.0, 250.0, 291.0, 291.0, 266.0, 256.0, 283.0, 290.0, 292.0, 290.0, 286.0, 290.0, 300.0, 284.0, 311.0, 316.0, 291.0, 291.0, 294.0, 282.0, 291.0, 288.0, 285.0, 291.0, 309.0, 321.0, 288.0, 285.0, 289.0, 290.0, 286.0, 287.0, 287.0, 286.0, 286.0, 290.0, 289.0, 287.0, 283.0, 290.0, 252.0, 267.0, 314.0, 313.0, 303.0, 278.0, 278.0, 298.0, 285.0, 294.0, 290.0, 286.0, 285.0, 288.0, 267.0, 260.0, 284.0, 300.0, 261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6951108003077172, "mean_inference_ms": 1.2399412656561892, "mean_action_processing_ms": 0.1334012911910848, "mean_env_wait_ms": 0.8369005759153684, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11059200, "num_agent_steps_trained": 11059200, "num_env_steps_sampled": 5529600, "num_env_steps_trained": 5529600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5529600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11059200, "timers": {"training_iteration_time_ms": 3646.379, "learn_time_ms": 1118.615, "learn_throughput": 11442.726, "synch_weights_time_ms": 11.386}, "counters": {"num_env_steps_sampled": 5529600, "num_env_steps_trained": 5529600, "num_agent_steps_sampled": 11059200, "num_agent_steps_trained": 11059200}, "done": false, "episodes_total": 13824, "training_iteration": 432, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-32", "timestamp": 1666582112, "time_this_iter_s": 3.6900887489318848, "time_total_s": 1646.9669601917267, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1646.9669601917267, "timesteps_since_restore": 0, "iterations_since_restore": 432, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.833333333333332, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.82, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.83, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.004141518846154213, "policy_loss": -0.004519705194979906, "vf_loss": 7.712698459625244, "vf_explained_var": 0.5621180534362793, "kl": 0.002915637567639351, "entropy": 0.7861687541007996, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5542400, "num_env_steps_trained": 5542400, "num_agent_steps_sampled": 11084800, "num_agent_steps_trained": 11084800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 569.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.71}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 175.82, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.93, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.83, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.18, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.08, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.19, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.85, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.13, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.19, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.19, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950606518602862, "mean_inference_ms": 1.2398391732462262, "mean_action_processing_ms": 0.1333949690210147, "mean_env_wait_ms": 0.8368347695385678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 569.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 284.71}, "hist_stats": {"episode_reward": [530.0, 533.0, 576.0, 579.0, 519.0, 573.0, 576.0, 582.0, 582.0, 579.0, 579.0, 582.0, 519.0, 573.0, 584.0, 587.0, 453.0, 579.0, 576.0, 525.0, 582.0, 576.0, 533.0, 570.0, 533.0, 576.0, 579.0, 576.0, 587.0, 587.0, 582.0, 579.0, 519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 269.0, 261.0, 272.0, 292.0, 284.0, 291.0, 288.0, 256.0, 263.0, 285.0, 288.0, 298.0, 278.0, 293.0, 289.0, 292.0, 290.0, 283.0, 296.0, 291.0, 288.0, 291.0, 291.0, 260.0, 259.0, 284.0, 289.0, 294.0, 290.0, 298.0, 289.0, 225.0, 228.0, 282.0, 297.0, 277.0, 299.0, 263.0, 262.0, 293.0, 289.0, 286.0, 290.0, 263.0, 270.0, 290.0, 280.0, 274.0, 259.0, 283.0, 293.0, 285.0, 294.0, 290.0, 286.0, 289.0, 298.0, 299.0, 288.0, 292.0, 290.0, 290.0, 289.0, 261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950606518602862, "mean_inference_ms": 1.2398391732462262, "mean_action_processing_ms": 0.1333949690210147, "mean_env_wait_ms": 0.8368347695385678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11084800, "num_agent_steps_trained": 11084800, "num_env_steps_sampled": 5542400, "num_env_steps_trained": 5542400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5542400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11084800, "timers": {"training_iteration_time_ms": 3637.444, "learn_time_ms": 1114.878, "learn_throughput": 11481.075, "synch_weights_time_ms": 12.533}, "counters": {"num_env_steps_sampled": 5542400, "num_env_steps_trained": 5542400, "num_agent_steps_sampled": 11084800, "num_agent_steps_trained": 11084800}, "done": false, "episodes_total": 13856, "training_iteration": 433, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-36", "timestamp": 1666582116, "time_this_iter_s": 3.568324565887451, "time_total_s": 1650.5352847576141, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1650.5352847576141, "timesteps_since_restore": 0, "iterations_since_restore": 433, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.419999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 176.65, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.92, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.67, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.96, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.67, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.67, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0013387391809374094, "policy_loss": 0.0009609795524738729, "vf_loss": 7.725425720214844, "vf_explained_var": 0.5909132957458496, "kl": 0.002303453627973795, "entropy": 0.7895658016204834, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5555200, "num_env_steps_trained": 5555200, "num_agent_steps_sampled": 11110400, "num_agent_steps_trained": 11110400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 573.05, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.525}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 80, "sparse_reward_max": 220, "shaped_reward_mean": 176.65, "shaped_reward_min": 77, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.58, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.49, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.92, "useful_onion_pickup_agent_1_min": 6, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.13, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.07, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.16, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.67, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.23, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.5, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.96, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.26, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.88, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.16, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.67, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.16, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.67, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950081891474147, "mean_inference_ms": 1.2397288239976665, "mean_action_processing_ms": 0.1333872792140637, "mean_env_wait_ms": 0.8367610499142053, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 237.0, "episode_reward_mean": 573.05, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 118.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.525}, "hist_stats": {"episode_reward": [519.0, 582.0, 582.0, 630.0, 342.0, 627.0, 579.0, 582.0, 584.0, 527.0, 582.0, 587.0, 579.0, 579.0, 582.0, 579.0, 633.0, 516.0, 576.0, 627.0, 476.0, 579.0, 587.0, 636.0, 579.0, 576.0, 633.0, 576.0, 530.0, 237.0, 581.0, 510.0, 584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 258.0, 283.0, 299.0, 283.0, 299.0, 316.0, 314.0, 180.0, 162.0, 316.0, 311.0, 293.0, 286.0, 291.0, 291.0, 287.0, 297.0, 253.0, 274.0, 291.0, 291.0, 299.0, 288.0, 289.0, 290.0, 283.0, 296.0, 292.0, 290.0, 291.0, 288.0, 317.0, 316.0, 266.0, 250.0, 294.0, 282.0, 312.0, 315.0, 234.0, 242.0, 291.0, 288.0, 296.0, 291.0, 317.0, 319.0, 288.0, 291.0, 286.0, 290.0, 314.0, 319.0, 294.0, 282.0, 265.0, 265.0, 118.0, 119.0, 293.0, 288.0, 240.0, 270.0, 297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6950081891474147, "mean_inference_ms": 1.2397288239976665, "mean_action_processing_ms": 0.1333872792140637, "mean_env_wait_ms": 0.8367610499142053, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11110400, "num_agent_steps_trained": 11110400, "num_env_steps_sampled": 5555200, "num_env_steps_trained": 5555200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5555200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11110400, "timers": {"training_iteration_time_ms": 3640.154, "learn_time_ms": 1118.553, "learn_throughput": 11443.353, "synch_weights_time_ms": 13.047}, "counters": {"num_env_steps_sampled": 5555200, "num_env_steps_trained": 5555200, "num_agent_steps_sampled": 11110400, "num_agent_steps_trained": 11110400}, "done": false, "episodes_total": 13888, "training_iteration": 434, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-39", "timestamp": 1666582119, "time_this_iter_s": 3.6646738052368164, "time_total_s": 1654.199958562851, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1654.199958562851, "timesteps_since_restore": 0, "iterations_since_restore": 434, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.5, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.29, "shaped_reward_min": 133, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.08, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.56, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.77, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.77, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.77, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0017595021054148674, "policy_loss": -0.0021555940620601177, "vf_loss": 7.937746047973633, "vf_explained_var": 0.5771285891532898, "kl": 0.0027183406054973602, "entropy": 0.7953658103942871, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5568000, "num_env_steps_trained": 5568000, "num_agent_steps_sampled": 11136000, "num_agent_steps_trained": 11136000}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 453.0, "episode_reward_mean": 575.29, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.645}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.29, "shaped_reward_min": 133, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.08, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.56, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.77, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.16, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.07, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.77, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.77, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6949450051728931, "mean_inference_ms": 1.2396064286152753, "mean_action_processing_ms": 0.1333788711332576, "mean_env_wait_ms": 0.8366754177335741, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 453.0, "episode_reward_mean": 575.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 287.645}, "hist_stats": {"episode_reward": [584.0, 472.0, 576.0, 627.0, 570.0, 579.0, 582.0, 579.0, 630.0, 630.0, 587.0, 579.0, 576.0, 627.0, 582.0, 576.0, 630.0, 576.0, 582.0, 627.0, 582.0, 630.0, 579.0, 582.0, 582.0, 582.0, 525.0, 579.0, 570.0, 573.0, 579.0, 573.0, 630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [297.0, 287.0, 239.0, 233.0, 295.0, 281.0, 319.0, 308.0, 294.0, 276.0, 293.0, 286.0, 290.0, 292.0, 283.0, 296.0, 316.0, 314.0, 311.0, 319.0, 296.0, 291.0, 294.0, 285.0, 291.0, 285.0, 306.0, 321.0, 290.0, 292.0, 285.0, 291.0, 314.0, 316.0, 283.0, 293.0, 291.0, 291.0, 315.0, 312.0, 291.0, 291.0, 315.0, 315.0, 290.0, 289.0, 294.0, 288.0, 289.0, 293.0, 294.0, 288.0, 255.0, 270.0, 288.0, 291.0, 275.0, 295.0, 289.0, 284.0, 288.0, 291.0, 289.0, 284.0, 312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6949450051728931, "mean_inference_ms": 1.2396064286152753, "mean_action_processing_ms": 0.1333788711332576, "mean_env_wait_ms": 0.8366754177335741, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11136000, "num_agent_steps_trained": 11136000, "num_env_steps_sampled": 5568000, "num_env_steps_trained": 5568000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5568000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11136000, "timers": {"training_iteration_time_ms": 3653.946, "learn_time_ms": 1129.094, "learn_throughput": 11336.518, "synch_weights_time_ms": 12.99}, "counters": {"num_env_steps_sampled": 5568000, "num_env_steps_trained": 5568000, "num_agent_steps_sampled": 11136000, "num_agent_steps_trained": 11136000}, "done": false, "episodes_total": 13920, "training_iteration": 435, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-43", "timestamp": 1666582123, "time_this_iter_s": 3.749124050140381, "time_total_s": 1657.9490826129913, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1657.9490826129913, "timesteps_since_restore": 0, "iterations_since_restore": 435, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.62, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.0, "shaped_reward_min": 133, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.08, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0002803339157253504, "policy_loss": -0.00010873284190893173, "vf_loss": 7.808413982391357, "vf_explained_var": 0.583696722984314, "kl": 0.0023585986346006393, "entropy": 0.7835491299629211, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5580800, "num_env_steps_trained": 5580800, "num_agent_steps_sampled": 11161600, "num_agent_steps_trained": 11161600}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 453.0, "episode_reward_mean": 573.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.9}, "custom_metrics": {"sparse_reward_mean": 198.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.0, "shaped_reward_min": 133, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.08, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.59, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.97, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.41, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.78, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.13, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.09, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.78, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.78, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6948806595730435, "mean_inference_ms": 1.2394844780647098, "mean_action_processing_ms": 0.1333695851340957, "mean_env_wait_ms": 0.8365902839854666, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 453.0, "episode_reward_mean": 573.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 225.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.9}, "hist_stats": {"episode_reward": [630.0, 510.0, 582.0, 573.0, 582.0, 524.0, 522.0, 627.0, 630.0, 524.0, 576.0, 582.0, 576.0, 633.0, 630.0, 582.0, 582.0, 581.0, 576.0, 624.0, 584.0, 633.0, 470.0, 582.0, 579.0, 530.0, 627.0, 579.0, 525.0, 579.0, 536.0, 576.0, 578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 318.0, 245.0, 265.0, 290.0, 292.0, 294.0, 279.0, 292.0, 290.0, 256.0, 268.0, 261.0, 261.0, 306.0, 321.0, 307.0, 323.0, 256.0, 268.0, 286.0, 290.0, 291.0, 291.0, 283.0, 293.0, 318.0, 315.0, 313.0, 317.0, 286.0, 296.0, 286.0, 296.0, 287.0, 294.0, 284.0, 292.0, 305.0, 319.0, 294.0, 290.0, 314.0, 319.0, 244.0, 226.0, 288.0, 294.0, 288.0, 291.0, 267.0, 263.0, 318.0, 309.0, 298.0, 281.0, 254.0, 271.0, 294.0, 285.0, 260.0, 276.0, 288.0, 288.0, 296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6948806595730435, "mean_inference_ms": 1.2394844780647098, "mean_action_processing_ms": 0.1333695851340957, "mean_env_wait_ms": 0.8365902839854666, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11161600, "num_agent_steps_trained": 11161600, "num_env_steps_sampled": 5580800, "num_env_steps_trained": 5580800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5580800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11161600, "timers": {"training_iteration_time_ms": 3651.61, "learn_time_ms": 1132.739, "learn_throughput": 11300.041, "synch_weights_time_ms": 12.854}, "counters": {"num_env_steps_sampled": 5580800, "num_env_steps_trained": 5580800, "num_agent_steps_sampled": 11161600, "num_agent_steps_trained": 11161600}, "done": false, "episodes_total": 13952, "training_iteration": 436, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-47", "timestamp": 1666582127, "time_this_iter_s": 3.7395496368408203, "time_total_s": 1661.6886322498322, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1661.6886322498322, "timesteps_since_restore": 0, "iterations_since_restore": 436, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.783333333333335, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.77, "shaped_reward_min": 29, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.38, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.77, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.77, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.77, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.001821667654439807, "policy_loss": 0.0014255057321861386, "vf_loss": 7.874016761779785, "vf_explained_var": 0.6142792105674744, "kl": 0.0027908834163099527, "entropy": 0.7824777364730835, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5593600, "num_env_steps_trained": 5593600, "num_agent_steps_sampled": 11187200, "num_agent_steps_trained": 11187200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 69.0, "episode_reward_mean": 569.37, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 284.685}, "custom_metrics": {"sparse_reward_mean": 196.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 175.77, "shaped_reward_min": 29, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.11, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.38, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.23, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.77, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.0, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.05, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.97, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.02, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.77, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.0, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.77, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.0, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6948230068460599, "mean_inference_ms": 1.2393774164488374, "mean_action_processing_ms": 0.13336225311356412, "mean_env_wait_ms": 0.8365219769699798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 69.0, "episode_reward_mean": 569.37, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 284.685}, "hist_stats": {"episode_reward": [578.0, 522.0, 582.0, 576.0, 573.0, 533.0, 579.0, 579.0, 453.0, 630.0, 579.0, 522.0, 579.0, 582.0, 576.0, 576.0, 527.0, 579.0, 582.0, 576.0, 581.0, 633.0, 579.0, 576.0, 522.0, 579.0, 510.0, 582.0, 587.0, 587.0, 582.0, 570.0, 525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [296.0, 282.0, 264.0, 258.0, 286.0, 296.0, 293.0, 283.0, 288.0, 285.0, 270.0, 263.0, 289.0, 290.0, 289.0, 290.0, 225.0, 228.0, 311.0, 319.0, 290.0, 289.0, 257.0, 265.0, 290.0, 289.0, 289.0, 293.0, 287.0, 289.0, 288.0, 288.0, 255.0, 272.0, 291.0, 288.0, 293.0, 289.0, 283.0, 293.0, 280.0, 301.0, 314.0, 319.0, 289.0, 290.0, 290.0, 286.0, 261.0, 261.0, 293.0, 286.0, 258.0, 252.0, 288.0, 294.0, 290.0, 297.0, 296.0, 291.0, 296.0, 286.0, 288.0, 282.0, 261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6948230068460599, "mean_inference_ms": 1.2393774164488374, "mean_action_processing_ms": 0.13336225311356412, "mean_env_wait_ms": 0.8365219769699798, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11187200, "num_agent_steps_trained": 11187200, "num_env_steps_sampled": 5593600, "num_env_steps_trained": 5593600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5593600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11187200, "timers": {"training_iteration_time_ms": 3630.519, "learn_time_ms": 1135.538, "learn_throughput": 11272.186, "synch_weights_time_ms": 12.895}, "counters": {"num_env_steps_sampled": 5593600, "num_env_steps_trained": 5593600, "num_agent_steps_sampled": 11187200, "num_agent_steps_trained": 11187200}, "done": false, "episodes_total": 13984, "training_iteration": 437, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-51", "timestamp": 1666582131, "time_this_iter_s": 3.654266595840454, "time_total_s": 1665.3428988456726, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1665.3428988456726, "timesteps_since_restore": 0, "iterations_since_restore": 437, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.720000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.67, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0028519879560917616, "policy_loss": -0.003249123226851225, "vf_loss": 7.91046142578125, "vf_explained_var": 0.5582560300827026, "kl": 0.00226527638733387, "entropy": 0.7878231406211853, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5606400, "num_env_steps_trained": 5606400, "num_agent_steps_sampled": 11212800, "num_agent_steps_trained": 11212800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 573.07, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.535}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.67, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.32, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.3, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.17, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.15, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.96, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.33, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.09, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.09, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.92, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.96, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.96, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6947747072864041, "mean_inference_ms": 1.2392755170077125, "mean_action_processing_ms": 0.1333566636626615, "mean_env_wait_ms": 0.8364654056699539, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 573.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.535}, "hist_stats": {"episode_reward": [525.0, 579.0, 519.0, 582.0, 516.0, 573.0, 630.0, 576.0, 582.0, 530.0, 579.0, 516.0, 573.0, 576.0, 579.0, 579.0, 581.0, 579.0, 579.0, 579.0, 579.0, 579.0, 576.0, 582.0, 633.0, 590.0, 630.0, 582.0, 573.0, 582.0, 627.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 293.0, 286.0, 268.0, 251.0, 291.0, 291.0, 248.0, 268.0, 280.0, 293.0, 312.0, 318.0, 286.0, 290.0, 281.0, 301.0, 261.0, 269.0, 286.0, 293.0, 257.0, 259.0, 284.0, 289.0, 286.0, 290.0, 283.0, 296.0, 290.0, 289.0, 288.0, 293.0, 289.0, 290.0, 291.0, 288.0, 286.0, 293.0, 286.0, 293.0, 292.0, 287.0, 285.0, 291.0, 293.0, 289.0, 314.0, 319.0, 299.0, 291.0, 316.0, 314.0, 289.0, 293.0, 287.0, 286.0, 294.0, 288.0, 308.0, 319.0, 283.0, 296.0, 294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6947747072864041, "mean_inference_ms": 1.2392755170077125, "mean_action_processing_ms": 0.1333566636626615, "mean_env_wait_ms": 0.8364654056699539, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11212800, "num_agent_steps_trained": 11212800, "num_env_steps_sampled": 5606400, "num_env_steps_trained": 5606400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5606400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11212800, "timers": {"training_iteration_time_ms": 3624.323, "learn_time_ms": 1138.837, "learn_throughput": 11239.539, "synch_weights_time_ms": 12.217}, "counters": {"num_env_steps_sampled": 5606400, "num_env_steps_trained": 5606400, "num_agent_steps_sampled": 11212800, "num_agent_steps_trained": 11212800}, "done": false, "episodes_total": 14016, "training_iteration": 438, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-55", "timestamp": 1666582135, "time_this_iter_s": 3.7086472511291504, "time_total_s": 1669.0515460968018, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1669.0515460968018, "timesteps_since_restore": 0, "iterations_since_restore": 438, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.9, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0007691812934353948, "policy_loss": 0.00037388643249869347, "vf_loss": 7.888741493225098, "vf_explained_var": 0.5663248896598816, "kl": 0.002474588342010975, "entropy": 0.7871589660644531, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5619200, "num_env_steps_trained": 5619200, "num_agent_steps_sampled": 11238400, "num_agent_steps_trained": 11238400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 572.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.45}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.9, "shaped_reward_min": 29, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.5, "onion_pickup_agent_0_min": 6, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.05, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.38, "useful_onion_pickup_agent_0_min": 6, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.94, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.09, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.09, "potting_onion_agent_0_min": 5, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.79, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.31, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.1, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.08, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.93, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.09, "optimal_onion_potting_agent_0_min": 5, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.79, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.09, "viable_onion_potting_agent_0_min": 5, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.79, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694728533373717, "mean_inference_ms": 1.2391726393322413, "mean_action_processing_ms": 0.13335137141154382, "mean_env_wait_ms": 0.8364092327819402, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 69.0, "episode_reward_mean": 572.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 34.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 286.45}, "hist_stats": {"episode_reward": [582.0, 576.0, 579.0, 582.0, 582.0, 630.0, 582.0, 636.0, 627.0, 582.0, 584.0, 573.0, 630.0, 579.0, 573.0, 579.0, 584.0, 581.0, 587.0, 576.0, 530.0, 522.0, 576.0, 579.0, 579.0, 576.0, 576.0, 573.0, 636.0, 294.0, 587.0, 627.0, 576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 286.0, 290.0, 293.0, 286.0, 291.0, 291.0, 281.0, 301.0, 311.0, 319.0, 293.0, 289.0, 325.0, 311.0, 314.0, 313.0, 291.0, 291.0, 294.0, 290.0, 285.0, 288.0, 314.0, 316.0, 290.0, 289.0, 293.0, 280.0, 293.0, 286.0, 285.0, 299.0, 290.0, 291.0, 300.0, 287.0, 291.0, 285.0, 267.0, 263.0, 263.0, 259.0, 294.0, 282.0, 288.0, 291.0, 289.0, 290.0, 283.0, 293.0, 286.0, 290.0, 298.0, 275.0, 319.0, 317.0, 146.0, 148.0, 295.0, 292.0, 316.0, 311.0, 293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694728533373717, "mean_inference_ms": 1.2391726393322413, "mean_action_processing_ms": 0.13335137141154382, "mean_env_wait_ms": 0.8364092327819402, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11238400, "num_agent_steps_trained": 11238400, "num_env_steps_sampled": 5619200, "num_env_steps_trained": 5619200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5619200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11238400, "timers": {"training_iteration_time_ms": 3623.556, "learn_time_ms": 1141.687, "learn_throughput": 11211.476, "synch_weights_time_ms": 12.285}, "counters": {"num_env_steps_sampled": 5619200, "num_env_steps_trained": 5619200, "num_agent_steps_sampled": 11238400, "num_agent_steps_trained": 11238400}, "done": false, "episodes_total": 14048, "training_iteration": 439, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-28-59", "timestamp": 1666582139, "time_this_iter_s": 3.6420202255249023, "time_total_s": 1672.6935663223267, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1672.6935663223267, "timesteps_since_restore": 0, "iterations_since_restore": 439, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.82, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.48, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.12, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.78, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.47, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.47, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.47, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0049171969294548035, "policy_loss": -0.005316406488418579, "vf_loss": 7.9975738525390625, "vf_explained_var": 0.5843226909637451, "kl": 0.0027200470212846994, "entropy": 0.8010973334312439, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5632000, "num_env_steps_trained": 5632000, "num_agent_steps_sampled": 11264000, "num_agent_steps_trained": 11264000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 559.28, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.64}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.48, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.12, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.78, "onion_pickup_agent_1_min": 1, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.67, "useful_onion_pickup_agent_1_min": 1, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.71, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.47, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.96, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.71, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.47, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.71, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.47, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6946899078139533, "mean_inference_ms": 1.2390712973970437, "mean_action_processing_ms": 0.13334667802007993, "mean_env_wait_ms": 0.8363554136317487, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 559.28, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.64}, "hist_stats": {"episode_reward": [576.0, 636.0, 582.0, 69.0, 630.0, 522.0, 627.0, 522.0, 525.0, 627.0, 576.0, 587.0, 582.0, 576.0, 510.0, 576.0, 579.0, 627.0, 579.0, 579.0, 579.0, 582.0, 573.0, 579.0, 573.0, 465.0, 582.0, 582.0, 639.0, 579.0, 587.0, 582.0, 579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 283.0, 314.0, 322.0, 288.0, 294.0, 35.0, 34.0, 309.0, 321.0, 257.0, 265.0, 313.0, 314.0, 249.0, 273.0, 263.0, 262.0, 311.0, 316.0, 294.0, 282.0, 287.0, 300.0, 295.0, 287.0, 295.0, 281.0, 251.0, 259.0, 285.0, 291.0, 291.0, 288.0, 313.0, 314.0, 288.0, 291.0, 288.0, 291.0, 283.0, 296.0, 297.0, 285.0, 296.0, 277.0, 286.0, 293.0, 290.0, 283.0, 237.0, 228.0, 288.0, 294.0, 288.0, 294.0, 321.0, 318.0, 289.0, 290.0, 291.0, 296.0, 291.0, 291.0, 283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6946899078139533, "mean_inference_ms": 1.2390712973970437, "mean_action_processing_ms": 0.13334667802007993, "mean_env_wait_ms": 0.8363554136317487, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11264000, "num_agent_steps_trained": 11264000, "num_env_steps_sampled": 5632000, "num_env_steps_trained": 5632000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5632000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11264000, "timers": {"training_iteration_time_ms": 3615.506, "learn_time_ms": 1133.411, "learn_throughput": 11293.34, "synch_weights_time_ms": 12.353}, "counters": {"num_env_steps_sampled": 5632000, "num_env_steps_trained": 5632000, "num_agent_steps_sampled": 11264000, "num_agent_steps_trained": 11264000}, "done": false, "episodes_total": 14080, "training_iteration": 440, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-03", "timestamp": 1666582143, "time_this_iter_s": 3.6508986949920654, "time_total_s": 1676.3444650173187, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1676.3444650173187, "timesteps_since_restore": 0, "iterations_since_restore": 440, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.950000000000003, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 173.6, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.44, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015632144641131163, "policy_loss": 0.0011703792260959744, "vf_loss": 7.836904525756836, "vf_explained_var": 0.583203911781311, "kl": 0.0023779491893947124, "entropy": 0.781711220741272, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5644800, "num_env_steps_trained": 5644800, "num_agent_steps_sampled": 11289600, "num_agent_steps_trained": 11289600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 563.6, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.8}, "custom_metrics": {"sparse_reward_mean": 195.0, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 173.6, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.44, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.62, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 16.36, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.16, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.9, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.09, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.11, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6946474203690406, "mean_inference_ms": 1.2389727343902235, "mean_action_processing_ms": 0.1333403613331771, "mean_env_wait_ms": 0.8363260041776678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 563.6, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 320.0}, "policy_reward_mean": {"ppo": 281.8}, "hist_stats": {"episode_reward": [579.0, 633.0, 624.0, 579.0, 582.0, 636.0, 579.0, 570.0, 584.0, 522.0, 576.0, 573.0, 579.0, 633.0, 525.0, 579.0, 582.0, 579.0, 525.0, 582.0, 573.0, 576.0, 587.0, 582.0, 582.0, 582.0, 627.0, 582.0, 582.0, 536.0, 582.0, 630.0, 587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 314.0, 319.0, 311.0, 313.0, 291.0, 288.0, 288.0, 294.0, 320.0, 316.0, 288.0, 291.0, 283.0, 287.0, 290.0, 294.0, 256.0, 266.0, 288.0, 288.0, 283.0, 290.0, 292.0, 287.0, 317.0, 316.0, 265.0, 260.0, 294.0, 285.0, 292.0, 290.0, 286.0, 293.0, 261.0, 264.0, 294.0, 288.0, 282.0, 291.0, 289.0, 287.0, 296.0, 291.0, 287.0, 295.0, 290.0, 292.0, 289.0, 293.0, 316.0, 311.0, 286.0, 296.0, 290.0, 292.0, 264.0, 272.0, 297.0, 285.0, 314.0, 316.0, 294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6946474203690406, "mean_inference_ms": 1.2389727343902235, "mean_action_processing_ms": 0.1333403613331771, "mean_env_wait_ms": 0.8363260041776678, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11289600, "num_agent_steps_trained": 11289600, "num_env_steps_sampled": 5644800, "num_env_steps_trained": 5644800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5644800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11289600, "timers": {"training_iteration_time_ms": 3624.105, "learn_time_ms": 1131.518, "learn_throughput": 11312.237, "synch_weights_time_ms": 12.495}, "counters": {"num_env_steps_sampled": 5644800, "num_env_steps_trained": 5644800, "num_agent_steps_sampled": 11289600, "num_agent_steps_trained": 11289600}, "done": false, "episodes_total": 14112, "training_iteration": 441, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-07", "timestamp": 1666582147, "time_this_iter_s": 3.8103065490722656, "time_total_s": 1680.154771566391, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1680.154771566391, "timesteps_since_restore": 0, "iterations_since_restore": 441, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.699999999999996, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.99, "shaped_reward_min": 26, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.17, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.81, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.79, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.79, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.79, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0020413980819284916, "policy_loss": -0.0024421534035354853, "vf_loss": 7.864043235778809, "vf_explained_var": 0.5776045322418213, "kl": 0.0032261847518384457, "entropy": 0.7712999582290649, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5657600, "num_env_steps_trained": 5657600, "num_agent_steps_sampled": 11315200, "num_agent_steps_trained": 11315200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 66.0, "episode_reward_mean": 562.59, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.295}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 172.99, "shaped_reward_min": 26, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.17, "onion_pickup_agent_0_min": 5, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.95, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.09, "useful_onion_pickup_agent_0_min": 5, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.81, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.09, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.79, "potting_onion_agent_0_min": 4, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 2, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.28, "dish_pickup_agent_0_min": 0, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 4.97, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.93, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.87, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 2, "optimal_onion_potting_agent_0_mean": 15.79, "optimal_onion_potting_agent_0_min": 4, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 2, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.79, "viable_onion_potting_agent_0_min": 4, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 2, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945962982346888, "mean_inference_ms": 1.2389306404779257, "mean_action_processing_ms": 0.13333350576114789, "mean_env_wait_ms": 0.8363681916751531, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 66.0, "episode_reward_mean": 562.59, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 32.0}, "policy_reward_max": {"ppo": 319.0}, "policy_reward_mean": {"ppo": 281.295}, "hist_stats": {"episode_reward": [587.0, 527.0, 576.0, 510.0, 582.0, 627.0, 465.0, 633.0, 519.0, 582.0, 533.0, 405.0, 522.0, 581.0, 522.0, 66.0, 579.0, 530.0, 576.0, 570.0, 579.0, 510.0, 582.0, 570.0, 627.0, 570.0, 579.0, 279.0, 579.0, 579.0, 573.0, 453.0, 579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 293.0, 262.0, 265.0, 293.0, 283.0, 253.0, 257.0, 299.0, 283.0, 311.0, 316.0, 239.0, 226.0, 314.0, 319.0, 262.0, 257.0, 289.0, 293.0, 268.0, 265.0, 207.0, 198.0, 264.0, 258.0, 291.0, 290.0, 254.0, 268.0, 32.0, 34.0, 286.0, 293.0, 267.0, 263.0, 286.0, 290.0, 284.0, 286.0, 286.0, 293.0, 257.0, 253.0, 291.0, 291.0, 294.0, 276.0, 316.0, 311.0, 282.0, 288.0, 289.0, 290.0, 139.0, 140.0, 291.0, 288.0, 291.0, 288.0, 290.0, 283.0, 219.0, 234.0, 293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945962982346888, "mean_inference_ms": 1.2389306404779257, "mean_action_processing_ms": 0.13333350576114789, "mean_env_wait_ms": 0.8363681916751531, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11315200, "num_agent_steps_trained": 11315200, "num_env_steps_sampled": 5657600, "num_env_steps_trained": 5657600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5657600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11315200, "timers": {"training_iteration_time_ms": 3657.426, "learn_time_ms": 1142.083, "learn_throughput": 11207.588, "synch_weights_time_ms": 11.94}, "counters": {"num_env_steps_sampled": 5657600, "num_env_steps_trained": 5657600, "num_agent_steps_sampled": 11315200, "num_agent_steps_trained": 11315200}, "done": false, "episodes_total": 14144, "training_iteration": 442, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-11", "timestamp": 1666582151, "time_this_iter_s": 4.022900342941284, "time_total_s": 1684.1776719093323, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1684.1776719093323, "timesteps_since_restore": 0, "iterations_since_restore": 442, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.483333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.63, "shaped_reward_min": 85, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.18, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.8, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.8, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.8, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0034412413369864225, "policy_loss": 0.0030490802600979805, "vf_loss": 7.769399166107178, "vf_explained_var": 0.5706442594528198, "kl": 0.007476408034563065, "entropy": 0.7695587873458862, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5670400, "num_env_steps_trained": 5670400, "num_agent_steps_sampled": 11340800, "num_agent_steps_trained": 11340800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 285.0, "episode_reward_mean": 579.83, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.915}, "custom_metrics": {"sparse_reward_mean": 200.6, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 178.63, "shaped_reward_min": 85, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.73, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.18, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.67, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.99, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.44, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.8, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.46, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.28, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.04, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.44, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.8, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.44, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.8, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945383548540814, "mean_inference_ms": 1.2388862595050705, "mean_action_processing_ms": 0.1333267827355028, "mean_env_wait_ms": 0.8364096381922611, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 285.0, "episode_reward_mean": 579.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 289.915}, "hist_stats": {"episode_reward": [579.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 522.0, 510.0, 573.0, 570.0, 573.0, 525.0, 582.0, 579.0, 627.0, 579.0, 525.0, 579.0, 576.0, 576.0, 584.0, 564.0, 582.0, 587.0, 570.0, 627.0, 582.0, 579.0, 579.0, 582.0, 582.0, 582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 296.0, 286.0, 294.0, 288.0, 292.0, 290.0, 286.0, 293.0, 283.0, 293.0, 288.0, 294.0, 257.0, 265.0, 267.0, 243.0, 291.0, 282.0, 283.0, 287.0, 288.0, 285.0, 267.0, 258.0, 286.0, 296.0, 286.0, 293.0, 308.0, 319.0, 296.0, 283.0, 267.0, 258.0, 296.0, 283.0, 291.0, 285.0, 280.0, 296.0, 293.0, 291.0, 292.0, 272.0, 289.0, 293.0, 296.0, 291.0, 284.0, 286.0, 318.0, 309.0, 293.0, 289.0, 294.0, 285.0, 288.0, 291.0, 292.0, 290.0, 294.0, 288.0, 284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6945383548540814, "mean_inference_ms": 1.2388862595050705, "mean_action_processing_ms": 0.1333267827355028, "mean_env_wait_ms": 0.8364096381922611, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11340800, "num_agent_steps_trained": 11340800, "num_env_steps_sampled": 5670400, "num_env_steps_trained": 5670400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5670400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11340800, "timers": {"training_iteration_time_ms": 3679.648, "learn_time_ms": 1158.316, "learn_throughput": 11050.529, "synch_weights_time_ms": 11.806}, "counters": {"num_env_steps_sampled": 5670400, "num_env_steps_trained": 5670400, "num_agent_steps_sampled": 11340800, "num_agent_steps_trained": 11340800}, "done": false, "episodes_total": 14176, "training_iteration": 443, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-15", "timestamp": 1666582155, "time_this_iter_s": 3.784215211868286, "time_total_s": 1687.9618871212006, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1687.9618871212006, "timesteps_since_restore": 0, "iterations_since_restore": 443, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.98333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 180.24, "shaped_reward_min": 85, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.84, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.14, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.37, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002385055646300316, "policy_loss": 0.0020049570593982935, "vf_loss": 7.654664993286133, "vf_explained_var": 0.5863066911697388, "kl": 0.0024675102904438972, "entropy": 0.7707381248474121, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5683200, "num_env_steps_trained": 5683200, "num_agent_steps_sampled": 11366400, "num_agent_steps_trained": 11366400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 285.0, "episode_reward_mean": 585.04, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 292.52}, "custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 180.24, "shaped_reward_min": 85, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.32, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.84, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.14, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.93, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.35, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.54, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.11, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.37, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.24, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.93, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.93, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944838399502834, "mean_inference_ms": 1.2388383439445032, "mean_action_processing_ms": 0.13331991171472862, "mean_env_wait_ms": 0.836413731185545, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 285.0, "episode_reward_mean": 585.04, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 292.52}, "hist_stats": {"episode_reward": [582.0, 579.0, 630.0, 579.0, 579.0, 582.0, 627.0, 582.0, 576.0, 570.0, 576.0, 582.0, 573.0, 525.0, 630.0, 579.0, 522.0, 576.0, 630.0, 579.0, 521.0, 570.0, 582.0, 582.0, 576.0, 630.0, 633.0, 576.0, 576.0, 579.0, 582.0, 522.0, 579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 298.0, 288.0, 291.0, 311.0, 319.0, 286.0, 293.0, 288.0, 291.0, 288.0, 294.0, 316.0, 311.0, 284.0, 298.0, 288.0, 288.0, 293.0, 277.0, 294.0, 282.0, 296.0, 286.0, 283.0, 290.0, 265.0, 260.0, 311.0, 319.0, 296.0, 283.0, 257.0, 265.0, 291.0, 285.0, 319.0, 311.0, 292.0, 287.0, 266.0, 255.0, 288.0, 282.0, 296.0, 286.0, 291.0, 291.0, 290.0, 286.0, 311.0, 319.0, 314.0, 319.0, 288.0, 288.0, 278.0, 298.0, 296.0, 283.0, 286.0, 296.0, 258.0, 264.0, 283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944838399502834, "mean_inference_ms": 1.2388383439445032, "mean_action_processing_ms": 0.13331991171472862, "mean_env_wait_ms": 0.836413731185545, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11366400, "num_agent_steps_trained": 11366400, "num_env_steps_sampled": 5683200, "num_env_steps_trained": 5683200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5683200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11366400, "timers": {"training_iteration_time_ms": 3681.302, "learn_time_ms": 1161.467, "learn_throughput": 11020.542, "synch_weights_time_ms": 11.211}, "counters": {"num_env_steps_sampled": 5683200, "num_env_steps_trained": 5683200, "num_agent_steps_sampled": 11366400, "num_agent_steps_trained": 11366400}, "done": false, "episodes_total": 14208, "training_iteration": 444, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-19", "timestamp": 1666582159, "time_this_iter_s": 3.6792352199554443, "time_total_s": 1691.641122341156, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1691.641122341156, "timesteps_since_restore": 0, "iterations_since_restore": 444, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.34, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.56, "shaped_reward_min": 85, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.77, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.47, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0007226134184747934, "policy_loss": -0.0011081930715590715, "vf_loss": 7.645465850830078, "vf_explained_var": 0.5968180894851685, "kl": 0.002879057079553604, "entropy": 0.7579330801963806, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5696000, "num_env_steps_trained": 5696000, "num_agent_steps_sampled": 11392000, "num_agent_steps_trained": 11392000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 285.0, "episode_reward_mean": 587.96, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 293.98}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 181.56, "shaped_reward_min": 85, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.77, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.07, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.46, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.2, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.47, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.51, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.26, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.13, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.22, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.07, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.14, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.46, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.2, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.46, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.2, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694432246453092, "mean_inference_ms": 1.2387215474986069, "mean_action_processing_ms": 0.13331381116020222, "mean_env_wait_ms": 0.8363609822221093, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 285.0, "episode_reward_mean": 587.96, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 134.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 293.98}, "hist_stats": {"episode_reward": [579.0, 579.0, 584.0, 582.0, 627.0, 579.0, 582.0, 636.0, 576.0, 582.0, 587.0, 630.0, 633.0, 570.0, 479.0, 582.0, 587.0, 579.0, 627.0, 285.0, 587.0, 624.0, 584.0, 582.0, 582.0, 579.0, 636.0, 573.0, 636.0, 579.0, 636.0, 579.0, 627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 284.0, 295.0, 298.0, 286.0, 286.0, 296.0, 321.0, 306.0, 296.0, 283.0, 285.0, 297.0, 322.0, 314.0, 286.0, 290.0, 291.0, 291.0, 296.0, 291.0, 317.0, 313.0, 321.0, 312.0, 283.0, 287.0, 248.0, 231.0, 289.0, 293.0, 289.0, 298.0, 293.0, 286.0, 316.0, 311.0, 134.0, 151.0, 292.0, 295.0, 306.0, 318.0, 296.0, 288.0, 292.0, 290.0, 293.0, 289.0, 286.0, 293.0, 327.0, 309.0, 288.0, 285.0, 314.0, 322.0, 291.0, 288.0, 314.0, 322.0, 289.0, 290.0, 311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694432246453092, "mean_inference_ms": 1.2387215474986069, "mean_action_processing_ms": 0.13331381116020222, "mean_env_wait_ms": 0.8363609822221093, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11392000, "num_agent_steps_trained": 11392000, "num_env_steps_sampled": 5696000, "num_env_steps_trained": 5696000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5696000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11392000, "timers": {"training_iteration_time_ms": 3686.651, "learn_time_ms": 1166.893, "learn_throughput": 10969.296, "synch_weights_time_ms": 10.581}, "counters": {"num_env_steps_sampled": 5696000, "num_env_steps_trained": 5696000, "num_agent_steps_sampled": 11392000, "num_agent_steps_trained": 11392000}, "done": false, "episodes_total": 14240, "training_iteration": 445, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-22", "timestamp": 1666582162, "time_this_iter_s": 3.789203643798828, "time_total_s": 1695.4303259849548, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1695.4303259849548, "timesteps_since_restore": 0, "iterations_since_restore": 445, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.633333333333333, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 183.52, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.76, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.32, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002931396011263132, "policy_loss": -0.0033237093593925238, "vf_loss": 7.657073497772217, "vf_explained_var": 0.5952635407447815, "kl": 0.002693354617804289, "entropy": 0.7467869520187378, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5708800, "num_env_steps_trained": 5708800, "num_agent_steps_sampled": 11417600, "num_agent_steps_trained": 11417600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 436.0, "episode_reward_mean": 595.12, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 213.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.56}, "custom_metrics": {"sparse_reward_mean": 205.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 183.52, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.93, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.76, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.83, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.1, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.06, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.05, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.47, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 16.58, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.63, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.42, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.32, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 9, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.24, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.2, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.47, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 16.58, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.47, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 16.58, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943827201604756, "mean_inference_ms": 1.2386117672157435, "mean_action_processing_ms": 0.13330709458738682, "mean_env_wait_ms": 0.8362894782649013, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 436.0, "episode_reward_mean": 595.12, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 213.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 297.56}, "hist_stats": {"episode_reward": [627.0, 582.0, 587.0, 582.0, 579.0, 630.0, 579.0, 519.0, 579.0, 573.0, 576.0, 630.0, 630.0, 573.0, 587.0, 582.0, 633.0, 582.0, 627.0, 584.0, 582.0, 567.0, 579.0, 584.0, 587.0, 579.0, 587.0, 630.0, 582.0, 582.0, 579.0, 627.0, 630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [311.0, 316.0, 286.0, 296.0, 294.0, 293.0, 292.0, 290.0, 291.0, 288.0, 314.0, 316.0, 288.0, 291.0, 256.0, 263.0, 285.0, 294.0, 293.0, 280.0, 291.0, 285.0, 316.0, 314.0, 316.0, 314.0, 291.0, 282.0, 286.0, 301.0, 288.0, 294.0, 316.0, 317.0, 289.0, 293.0, 321.0, 306.0, 296.0, 288.0, 285.0, 297.0, 294.0, 273.0, 287.0, 292.0, 296.0, 288.0, 293.0, 294.0, 296.0, 283.0, 299.0, 288.0, 319.0, 311.0, 293.0, 289.0, 293.0, 289.0, 282.0, 297.0, 308.0, 319.0, 314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943827201604756, "mean_inference_ms": 1.2386117672157435, "mean_action_processing_ms": 0.13330709458738682, "mean_env_wait_ms": 0.8362894782649013, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11417600, "num_agent_steps_trained": 11417600, "num_env_steps_sampled": 5708800, "num_env_steps_trained": 5708800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5708800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11417600, "timers": {"training_iteration_time_ms": 3674.432, "learn_time_ms": 1151.071, "learn_throughput": 11120.082, "synch_weights_time_ms": 10.502}, "counters": {"num_env_steps_sampled": 5708800, "num_env_steps_trained": 5708800, "num_agent_steps_sampled": 11417600, "num_agent_steps_trained": 11417600}, "done": false, "episodes_total": 14272, "training_iteration": 446, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-26", "timestamp": 1666582166, "time_this_iter_s": 3.600297212600708, "time_total_s": 1699.0306231975555, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1699.0306231975555, "timesteps_since_restore": 0, "iterations_since_restore": 446, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.720000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 204.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.27, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0001785162603482604, "policy_loss": -0.0005822917446494102, "vf_loss": 7.868484020233154, "vf_explained_var": 0.5918752551078796, "kl": 0.003730412572622299, "entropy": 0.7661446928977966, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5721600, "num_env_steps_trained": 5721600, "num_agent_steps_sampled": 11443200, "num_agent_steps_trained": 11443200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 591.87, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.935}, "custom_metrics": {"sparse_reward_mean": 204.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.27, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.78, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.82, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.68, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.72, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.12, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 3, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.09, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.35, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.5, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.55, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.37, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.17, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.15, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.35, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.5, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.35, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.5, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943396641366125, "mean_inference_ms": 1.238497366545473, "mean_action_processing_ms": 0.13330002318819154, "mean_env_wait_ms": 0.8362190938658364, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 591.87, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.935}, "hist_stats": {"episode_reward": [630.0, 582.0, 581.0, 576.0, 587.0, 582.0, 576.0, 579.0, 627.0, 630.0, 519.0, 633.0, 627.0, 579.0, 579.0, 636.0, 582.0, 576.0, 633.0, 627.0, 573.0, 582.0, 630.0, 582.0, 582.0, 587.0, 530.0, 627.0, 582.0, 579.0, 582.0, 587.0, 627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [314.0, 316.0, 291.0, 291.0, 283.0, 298.0, 283.0, 293.0, 296.0, 291.0, 293.0, 289.0, 285.0, 291.0, 291.0, 288.0, 314.0, 313.0, 319.0, 311.0, 266.0, 253.0, 322.0, 311.0, 318.0, 309.0, 284.0, 295.0, 287.0, 292.0, 314.0, 322.0, 291.0, 291.0, 283.0, 293.0, 308.0, 325.0, 306.0, 321.0, 285.0, 288.0, 289.0, 293.0, 319.0, 311.0, 295.0, 287.0, 288.0, 294.0, 283.0, 304.0, 270.0, 260.0, 318.0, 309.0, 289.0, 293.0, 293.0, 286.0, 291.0, 291.0, 299.0, 288.0, 313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943396641366125, "mean_inference_ms": 1.238497366545473, "mean_action_processing_ms": 0.13330002318819154, "mean_env_wait_ms": 0.8362190938658364, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11443200, "num_agent_steps_trained": 11443200, "num_env_steps_sampled": 5721600, "num_env_steps_trained": 5721600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5721600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11443200, "timers": {"training_iteration_time_ms": 3671.393, "learn_time_ms": 1148.433, "learn_throughput": 11145.621, "synch_weights_time_ms": 10.642}, "counters": {"num_env_steps_sampled": 5721600, "num_env_steps_trained": 5721600, "num_agent_steps_sampled": 11443200, "num_agent_steps_trained": 11443200}, "done": false, "episodes_total": 14304, "training_iteration": 447, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-30", "timestamp": 1666582170, "time_this_iter_s": 3.6180174350738525, "time_total_s": 1702.6486406326294, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1702.6486406326294, "timesteps_since_restore": 0, "iterations_since_restore": 447, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.45, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 205.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.36, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002196711488068104, "policy_loss": 0.001786647131666541, "vf_loss": 7.865475177764893, "vf_explained_var": 0.5819615721702576, "kl": 0.002893120050430298, "entropy": 0.7529665231704712, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5734400, "num_env_steps_trained": 5734400, "num_agent_steps_sampled": 11468800, "num_agent_steps_trained": 11468800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 592.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 296.275}, "custom_metrics": {"sparse_reward_mean": 205.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.95, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 20, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.88, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 20, "useful_onion_pickup_agent_1_mean": 16.4, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.64, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.21, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.5, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.34, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.36, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.03, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 5.11, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.06, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.64, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.21, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.64, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.21, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942923385290828, "mean_inference_ms": 1.2383771063815237, "mean_action_processing_ms": 0.13329224665823974, "mean_env_wait_ms": 0.8361472055742027, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 592.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 296.275}, "hist_stats": {"episode_reward": [627.0, 582.0, 436.0, 639.0, 576.0, 582.0, 576.0, 516.0, 636.0, 627.0, 579.0, 582.0, 633.0, 582.0, 639.0, 582.0, 639.0, 636.0, 582.0, 587.0, 627.0, 633.0, 573.0, 579.0, 579.0, 582.0, 636.0, 636.0, 636.0, 633.0, 630.0, 582.0, 582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 291.0, 291.0, 223.0, 213.0, 316.0, 323.0, 284.0, 292.0, 292.0, 290.0, 289.0, 287.0, 250.0, 266.0, 317.0, 319.0, 319.0, 308.0, 291.0, 288.0, 289.0, 293.0, 317.0, 316.0, 291.0, 291.0, 320.0, 319.0, 296.0, 286.0, 319.0, 320.0, 321.0, 315.0, 292.0, 290.0, 291.0, 296.0, 321.0, 306.0, 316.0, 317.0, 293.0, 280.0, 288.0, 291.0, 293.0, 286.0, 291.0, 291.0, 316.0, 320.0, 317.0, 319.0, 317.0, 319.0, 314.0, 319.0, 316.0, 314.0, 293.0, 289.0, 288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942923385290828, "mean_inference_ms": 1.2383771063815237, "mean_action_processing_ms": 0.13329224665823974, "mean_env_wait_ms": 0.8361472055742027, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11468800, "num_agent_steps_trained": 11468800, "num_env_steps_sampled": 5734400, "num_env_steps_trained": 5734400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5734400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11468800, "timers": {"training_iteration_time_ms": 3669.461, "learn_time_ms": 1149.523, "learn_throughput": 11135.05, "synch_weights_time_ms": 10.803}, "counters": {"num_env_steps_sampled": 5734400, "num_env_steps_trained": 5734400, "num_agent_steps_sampled": 11468800, "num_agent_steps_trained": 11468800}, "done": false, "episodes_total": 14336, "training_iteration": 448, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-34", "timestamp": 1666582174, "time_this_iter_s": 3.6794474124908447, "time_total_s": 1706.3280880451202, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1706.3280880451202, "timesteps_since_restore": 0, "iterations_since_restore": 448, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.659999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 180.34, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0027708648703992367, "policy_loss": 0.0023652866948395967, "vf_loss": 7.862312316894531, "vf_explained_var": 0.6047019958496094, "kl": 0.003352868603542447, "entropy": 0.7613069415092468, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5747200, "num_env_steps_trained": 5747200, "num_agent_steps_sampled": 11494400, "num_agent_steps_trained": 11494400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 586.34, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.17}, "custom_metrics": {"sparse_reward_mean": 203.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 180.34, "shaped_reward_min": 54, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.77, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.43, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.7, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.1, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.04, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 6, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.41, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.61, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.15, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 5.03, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.25, "soup_pickup_agent_1_min": 1, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.18, "soup_delivery_agent_1_min": 1, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 6, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 6, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694239495603513, "mean_inference_ms": 1.2382438214862441, "mean_action_processing_ms": 0.13328110236079596, "mean_env_wait_ms": 0.8360554712674403, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 174.0, "episode_reward_mean": 586.34, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 83.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.17}, "hist_stats": {"episode_reward": [582.0, 630.0, 636.0, 630.0, 174.0, 576.0, 576.0, 636.0, 624.0, 576.0, 527.0, 630.0, 576.0, 579.0, 587.0, 573.0, 582.0, 573.0, 633.0, 579.0, 630.0, 579.0, 582.0, 636.0, 636.0, 587.0, 630.0, 630.0, 633.0, 582.0, 593.0, 633.0, 570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 318.0, 312.0, 317.0, 319.0, 316.0, 314.0, 91.0, 83.0, 289.0, 287.0, 283.0, 293.0, 316.0, 320.0, 302.0, 322.0, 293.0, 283.0, 267.0, 260.0, 313.0, 317.0, 287.0, 289.0, 301.0, 278.0, 291.0, 296.0, 282.0, 291.0, 294.0, 288.0, 290.0, 283.0, 312.0, 321.0, 291.0, 288.0, 311.0, 319.0, 286.0, 293.0, 291.0, 291.0, 317.0, 319.0, 317.0, 319.0, 297.0, 290.0, 319.0, 311.0, 313.0, 317.0, 319.0, 314.0, 289.0, 293.0, 296.0, 297.0, 320.0, 313.0, 283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694239495603513, "mean_inference_ms": 1.2382438214862441, "mean_action_processing_ms": 0.13328110236079596, "mean_env_wait_ms": 0.8360554712674403, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11494400, "num_agent_steps_trained": 11494400, "num_env_steps_sampled": 5747200, "num_env_steps_trained": 5747200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5747200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11494400, "timers": {"training_iteration_time_ms": 3668.549, "learn_time_ms": 1147.299, "learn_throughput": 11156.636, "synch_weights_time_ms": 10.947}, "counters": {"num_env_steps_sampled": 5747200, "num_env_steps_trained": 5747200, "num_agent_steps_sampled": 11494400, "num_agent_steps_trained": 11494400}, "done": false, "episodes_total": 14368, "training_iteration": 449, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-38", "timestamp": 1666582178, "time_this_iter_s": 3.6126880645751953, "time_total_s": 1709.9407761096954, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1709.9407761096954, "timesteps_since_restore": 0, "iterations_since_restore": 449, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.32, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.4, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.46, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.28, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.41, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.27, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002730258274823427, "policy_loss": -0.0031343363225460052, "vf_loss": 7.75507926940918, "vf_explained_var": 0.6010830402374268, "kl": 0.002409199485555291, "entropy": 0.7428597211837769, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5760000, "num_env_steps_trained": 5760000, "num_agent_steps_sampled": 11520000, "num_agent_steps_trained": 11520000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.2, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.1}, "custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.4, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.87, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.46, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.83, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.28, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.07, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.43, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.19, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.41, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.14, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.12, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.95, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.27, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.07, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.07, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694306098577838, "mean_inference_ms": 1.2382181975816857, "mean_action_processing_ms": 0.1332722053111739, "mean_env_wait_ms": 0.835985405310874, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 590.2, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 295.1}, "hist_stats": {"episode_reward": [570.0, 582.0, 510.0, 567.0, 627.0, 639.0, 587.0, 582.0, 587.0, 587.0, 579.0, 624.0, 627.0, 582.0, 582.0, 587.0, 633.0, 576.0, 573.0, 519.0, 582.0, 582.0, 579.0, 582.0, 630.0, 582.0, 633.0, 627.0, 582.0, 633.0, 582.0, 627.0, 570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 287.0, 294.0, 288.0, 252.0, 258.0, 273.0, 294.0, 308.0, 319.0, 323.0, 316.0, 294.0, 293.0, 291.0, 291.0, 293.0, 294.0, 283.0, 304.0, 294.0, 285.0, 325.0, 299.0, 311.0, 316.0, 295.0, 287.0, 294.0, 288.0, 294.0, 293.0, 322.0, 311.0, 283.0, 293.0, 280.0, 293.0, 263.0, 256.0, 286.0, 296.0, 293.0, 289.0, 283.0, 296.0, 298.0, 284.0, 319.0, 311.0, 300.0, 282.0, 320.0, 313.0, 321.0, 306.0, 288.0, 294.0, 322.0, 311.0, 289.0, 293.0, 311.0, 316.0, 276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694306098577838, "mean_inference_ms": 1.2382181975816857, "mean_action_processing_ms": 0.1332722053111739, "mean_env_wait_ms": 0.835985405310874, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11520000, "num_agent_steps_trained": 11520000, "num_env_steps_sampled": 5760000, "num_env_steps_trained": 5760000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5760000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11520000, "timers": {"training_iteration_time_ms": 3691.668, "learn_time_ms": 1147.415, "learn_throughput": 11155.507, "synch_weights_time_ms": 10.937}, "counters": {"num_env_steps_sampled": 5760000, "num_env_steps_trained": 5760000, "num_agent_steps_sampled": 11520000, "num_agent_steps_trained": 11520000}, "done": false, "episodes_total": 14400, "training_iteration": 450, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-42", "timestamp": 1666582182, "time_this_iter_s": 3.8742873668670654, "time_total_s": 1713.8150634765625, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1713.8150634765625, "timesteps_since_restore": 0, "iterations_since_restore": 450, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.483333333333334, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.5, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.8, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.36, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.18, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.43, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.18, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.18, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001394312595948577, "policy_loss": -0.0018049200298264623, "vf_loss": 7.862459182739258, "vf_explained_var": 0.5844732522964478, "kl": 0.0030340240336954594, "entropy": 0.75127774477005, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5772800, "num_env_steps_trained": 5772800, "num_agent_steps_sampled": 11545600, "num_agent_steps_trained": 11545600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 589.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.55}, "custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 181.5, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.8, "onion_pickup_agent_0_min": 7, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 4, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.72, "useful_onion_pickup_agent_0_min": 7, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.36, "useful_onion_pickup_agent_1_min": 4, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.52, "potting_onion_agent_0_min": 7, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 16.18, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 23, "dish_pickup_agent_0_mean": 5.42, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.17, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.43, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.13, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.13, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.05, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 3, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.33, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.96, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.52, "optimal_onion_potting_agent_0_min": 7, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 16.18, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 23, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.52, "viable_onion_potting_agent_0_min": 7, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 16.18, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 23, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943649712257816, "mean_inference_ms": 1.2381989697127307, "mean_action_processing_ms": 0.13326478263615582, "mean_env_wait_ms": 0.8359216257077268, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 589.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 89.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.55}, "hist_stats": {"episode_reward": [570.0, 587.0, 527.0, 636.0, 633.0, 633.0, 573.0, 630.0, 627.0, 579.0, 582.0, 587.0, 579.0, 180.0, 570.0, 630.0, 627.0, 570.0, 579.0, 413.0, 579.0, 576.0, 630.0, 582.0, 630.0, 576.0, 636.0, 525.0, 639.0, 587.0, 579.0, 522.0, 582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [276.0, 294.0, 292.0, 295.0, 265.0, 262.0, 319.0, 317.0, 316.0, 317.0, 317.0, 316.0, 285.0, 288.0, 314.0, 316.0, 310.0, 317.0, 283.0, 296.0, 293.0, 289.0, 289.0, 298.0, 292.0, 287.0, 89.0, 91.0, 271.0, 299.0, 317.0, 313.0, 308.0, 319.0, 290.0, 280.0, 291.0, 288.0, 205.0, 208.0, 294.0, 285.0, 285.0, 291.0, 316.0, 314.0, 291.0, 291.0, 314.0, 316.0, 283.0, 293.0, 316.0, 320.0, 266.0, 259.0, 318.0, 321.0, 291.0, 296.0, 293.0, 286.0, 256.0, 266.0, 291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943649712257816, "mean_inference_ms": 1.2381989697127307, "mean_action_processing_ms": 0.13326478263615582, "mean_env_wait_ms": 0.8359216257077268, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11545600, "num_agent_steps_trained": 11545600, "num_env_steps_sampled": 5772800, "num_env_steps_trained": 5772800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5772800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11545600, "timers": {"training_iteration_time_ms": 3668.128, "learn_time_ms": 1140.587, "learn_throughput": 11222.293, "synch_weights_time_ms": 12.155}, "counters": {"num_env_steps_sampled": 5772800, "num_env_steps_trained": 5772800, "num_agent_steps_sampled": 11545600, "num_agent_steps_trained": 11545600}, "done": false, "episodes_total": 14432, "training_iteration": 451, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-45", "timestamp": 1666582185, "time_this_iter_s": 3.5603537559509277, "time_total_s": 1717.3754172325134, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1717.3754172325134, "timesteps_since_restore": 0, "iterations_since_restore": 451, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.62, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.48, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.99, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.52, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.79, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.31, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.5, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.4, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.35, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.79, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.31, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.79, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.31, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.003468575421720743, "policy_loss": 0.0030773193575441837, "vf_loss": 7.6872453689575195, "vf_explained_var": 0.5932549834251404, "kl": 0.002268628915771842, "entropy": 0.7549370527267456, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5785600, "num_env_steps_trained": 5785600, "num_agent_steps_sampled": 11571200, "num_agent_steps_trained": 11571200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 595.88, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 297.94}, "custom_metrics": {"sparse_reward_mean": 206.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.48, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.07, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.99, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.52, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.79, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.31, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.4, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.21, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.5, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.1, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.02, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.4, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.98, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.35, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.79, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.31, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.79, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.31, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944271013396697, "mean_inference_ms": 1.2381871099870232, "mean_action_processing_ms": 0.13325826778233354, "mean_env_wait_ms": 0.835862806925344, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 595.88, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 297.94}, "hist_stats": {"episode_reward": [582.0, 639.0, 636.0, 633.0, 579.0, 579.0, 633.0, 630.0, 582.0, 582.0, 579.0, 630.0, 587.0, 627.0, 579.0, 633.0, 582.0, 630.0, 576.0, 630.0, 582.0, 582.0, 630.0, 627.0, 582.0, 639.0, 570.0, 627.0, 576.0, 573.0, 582.0, 584.0, 582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 291.0, 324.0, 315.0, 314.0, 322.0, 317.0, 316.0, 290.0, 289.0, 296.0, 283.0, 312.0, 321.0, 312.0, 318.0, 291.0, 291.0, 288.0, 294.0, 289.0, 290.0, 308.0, 322.0, 286.0, 301.0, 319.0, 308.0, 287.0, 292.0, 319.0, 314.0, 297.0, 285.0, 319.0, 311.0, 288.0, 288.0, 311.0, 319.0, 289.0, 293.0, 292.0, 290.0, 311.0, 319.0, 311.0, 316.0, 294.0, 288.0, 322.0, 317.0, 282.0, 288.0, 319.0, 308.0, 283.0, 293.0, 287.0, 286.0, 288.0, 294.0, 290.0, 294.0, 292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944271013396697, "mean_inference_ms": 1.2381871099870232, "mean_action_processing_ms": 0.13325826778233354, "mean_env_wait_ms": 0.835862806925344, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11571200, "num_agent_steps_trained": 11571200, "num_env_steps_sampled": 5785600, "num_env_steps_trained": 5785600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5785600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11571200, "timers": {"training_iteration_time_ms": 3624.048, "learn_time_ms": 1120.872, "learn_throughput": 11419.678, "synch_weights_time_ms": 12.16}, "counters": {"num_env_steps_sampled": 5785600, "num_env_steps_trained": 5785600, "num_agent_steps_sampled": 11571200, "num_agent_steps_trained": 11571200}, "done": false, "episodes_total": 14464, "training_iteration": 452, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-49", "timestamp": 1666582189, "time_this_iter_s": 3.579380512237549, "time_total_s": 1720.954797744751, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1720.954797744751, "timesteps_since_restore": 0, "iterations_since_restore": 452, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.766666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.12, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.03, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.82, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.58, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.43, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.36, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.82, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.82, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008576636901125312, "policy_loss": 0.0004561354289762676, "vf_loss": 7.776826858520508, "vf_explained_var": 0.6098490953445435, "kl": 0.003105924464762211, "entropy": 0.7523094415664673, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5798400, "num_env_steps_trained": 5798400, "num_agent_steps_sampled": 11596800, "num_agent_steps_trained": 11596800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 591.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 295.675}, "custom_metrics": {"sparse_reward_mean": 204.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.55, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.12, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.44, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.03, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.24, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.82, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.12, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.76, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.58, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.43, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.36, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.82, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.12, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.82, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.12, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944050950948926, "mean_inference_ms": 1.23808158227914, "mean_action_processing_ms": 0.13325236698769452, "mean_env_wait_ms": 0.8357987332989592, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 591.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 295.675}, "hist_stats": {"episode_reward": [582.0, 627.0, 582.0, 633.0, 582.0, 636.0, 582.0, 579.0, 579.0, 630.0, 587.0, 579.0, 584.0, 576.0, 582.0, 579.0, 576.0, 627.0, 579.0, 636.0, 587.0, 633.0, 633.0, 582.0, 630.0, 590.0, 576.0, 582.0, 522.0, 579.0, 573.0, 570.0, 527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 311.0, 316.0, 290.0, 292.0, 317.0, 316.0, 289.0, 293.0, 317.0, 319.0, 288.0, 294.0, 283.0, 296.0, 288.0, 291.0, 313.0, 317.0, 293.0, 294.0, 292.0, 287.0, 294.0, 290.0, 284.0, 292.0, 286.0, 296.0, 299.0, 280.0, 291.0, 285.0, 311.0, 316.0, 288.0, 291.0, 327.0, 309.0, 293.0, 294.0, 316.0, 317.0, 317.0, 316.0, 291.0, 291.0, 312.0, 318.0, 293.0, 297.0, 287.0, 289.0, 289.0, 293.0, 268.0, 254.0, 291.0, 288.0, 280.0, 293.0, 283.0, 287.0, 274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6944050950948926, "mean_inference_ms": 1.23808158227914, "mean_action_processing_ms": 0.13325236698769452, "mean_env_wait_ms": 0.8357987332989592, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11596800, "num_agent_steps_trained": 11596800, "num_env_steps_sampled": 5798400, "num_env_steps_trained": 5798400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5798400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11596800, "timers": {"training_iteration_time_ms": 3603.123, "learn_time_ms": 1103.911, "learn_throughput": 11595.142, "synch_weights_time_ms": 11.954}, "counters": {"num_env_steps_sampled": 5798400, "num_env_steps_trained": 5798400, "num_agent_steps_sampled": 11596800, "num_agent_steps_trained": 11596800}, "done": false, "episodes_total": 14496, "training_iteration": 453, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-53", "timestamp": 1666582193, "time_this_iter_s": 3.5724422931671143, "time_total_s": 1724.527240037918, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1724.527240037918, "timesteps_since_restore": 0, "iterations_since_restore": 453, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.0, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 204.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.14, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.16, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.78, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.59, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.45, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.37, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.78, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.78, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006028414354659617, "policy_loss": -0.0009957019938156009, "vf_loss": 7.673003196716309, "vf_explained_var": 0.6046704053878784, "kl": 0.0030364375561475754, "entropy": 0.7488775253295898, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5811200, "num_env_steps_trained": 5811200, "num_agent_steps_sampled": 11622400, "num_agent_steps_trained": 11622400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 591.74, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 295.87}, "custom_metrics": {"sparse_reward_mean": 204.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.14, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.2, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.41, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.16, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.2, "useful_onion_pickup_agent_1_min": 12, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.78, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.06, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.0, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.59, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.93, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.45, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.37, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.78, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.06, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.78, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.06, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694358388660809, "mean_inference_ms": 1.2379814229738246, "mean_action_processing_ms": 0.13324649046471473, "mean_env_wait_ms": 0.8357372653835803, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 591.74, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 295.87}, "hist_stats": {"episode_reward": [527.0, 582.0, 539.0, 633.0, 579.0, 582.0, 627.0, 582.0, 630.0, 633.0, 579.0, 630.0, 584.0, 582.0, 582.0, 573.0, 573.0, 627.0, 582.0, 627.0, 639.0, 533.0, 636.0, 576.0, 582.0, 576.0, 630.0, 582.0, 630.0, 630.0, 633.0, 522.0, 582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [274.0, 253.0, 288.0, 294.0, 265.0, 274.0, 311.0, 322.0, 292.0, 287.0, 286.0, 296.0, 316.0, 311.0, 291.0, 291.0, 314.0, 316.0, 312.0, 321.0, 290.0, 289.0, 314.0, 316.0, 296.0, 288.0, 295.0, 287.0, 283.0, 299.0, 288.0, 285.0, 283.0, 290.0, 316.0, 311.0, 293.0, 289.0, 313.0, 314.0, 324.0, 315.0, 265.0, 268.0, 320.0, 316.0, 282.0, 294.0, 286.0, 296.0, 283.0, 293.0, 311.0, 319.0, 289.0, 293.0, 317.0, 313.0, 308.0, 322.0, 314.0, 319.0, 271.0, 251.0, 294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694358388660809, "mean_inference_ms": 1.2379814229738246, "mean_action_processing_ms": 0.13324649046471473, "mean_env_wait_ms": 0.8357372653835803, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11622400, "num_agent_steps_trained": 11622400, "num_env_steps_sampled": 5811200, "num_env_steps_trained": 5811200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5811200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11622400, "timers": {"training_iteration_time_ms": 3599.392, "learn_time_ms": 1095.152, "learn_throughput": 11687.876, "synch_weights_time_ms": 11.457}, "counters": {"num_env_steps_sampled": 5811200, "num_env_steps_trained": 5811200, "num_agent_steps_sampled": 11622400, "num_agent_steps_trained": 11622400}, "done": false, "episodes_total": 14528, "training_iteration": 454, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-29-57", "timestamp": 1666582197, "time_this_iter_s": 3.6274921894073486, "time_total_s": 1728.1547322273254, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1728.1547322273254, "timesteps_since_restore": 0, "iterations_since_restore": 454, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.666666666666668, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.08, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.92, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.56, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.19, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.47, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.56, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.19, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.56, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.19, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014731429982930422, "policy_loss": 0.0010732044465839863, "vf_loss": 7.764483451843262, "vf_explained_var": 0.5941508412361145, "kl": 0.003150323638692498, "entropy": 0.7530198097229004, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5824000, "num_env_steps_trained": 5824000, "num_agent_steps_sampled": 11648000, "num_agent_steps_trained": 11648000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 592.48, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 296.24}, "custom_metrics": {"sparse_reward_mean": 205.2, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 182.08, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.55, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.92, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.27, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 2, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 2, "potting_onion_agent_0_mean": 16.56, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.19, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.63, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 5.14, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.47, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 5.04, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.02, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.28, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.03, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.56, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.19, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.56, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.19, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943095786537452, "mean_inference_ms": 1.2378769399875804, "mean_action_processing_ms": 0.13324093695251057, "mean_env_wait_ms": 0.8356776468016364, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 476.0, "episode_reward_mean": 592.48, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 237.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 296.24}, "hist_stats": {"episode_reward": [582.0, 579.0, 519.0, 630.0, 639.0, 579.0, 573.0, 522.0, 639.0, 584.0, 587.0, 582.0, 519.0, 639.0, 576.0, 582.0, 579.0, 533.0, 579.0, 633.0, 579.0, 630.0, 582.0, 633.0, 582.0, 576.0, 476.0, 525.0, 633.0, 636.0, 579.0, 581.0, 636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 288.0, 285.0, 294.0, 253.0, 266.0, 309.0, 321.0, 320.0, 319.0, 291.0, 288.0, 285.0, 288.0, 253.0, 269.0, 317.0, 322.0, 299.0, 285.0, 291.0, 296.0, 291.0, 291.0, 261.0, 258.0, 317.0, 322.0, 274.0, 302.0, 296.0, 286.0, 289.0, 290.0, 259.0, 274.0, 293.0, 286.0, 314.0, 319.0, 284.0, 295.0, 314.0, 316.0, 297.0, 285.0, 314.0, 319.0, 294.0, 288.0, 289.0, 287.0, 237.0, 239.0, 263.0, 262.0, 319.0, 314.0, 314.0, 322.0, 278.0, 301.0, 283.0, 298.0, 319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6943095786537452, "mean_inference_ms": 1.2378769399875804, "mean_action_processing_ms": 0.13324093695251057, "mean_env_wait_ms": 0.8356776468016364, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11648000, "num_agent_steps_trained": 11648000, "num_env_steps_sampled": 5824000, "num_env_steps_trained": 5824000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5824000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11648000, "timers": {"training_iteration_time_ms": 3584.012, "learn_time_ms": 1084.705, "learn_throughput": 11800.44, "synch_weights_time_ms": 11.283}, "counters": {"num_env_steps_sampled": 5824000, "num_env_steps_trained": 5824000, "num_agent_steps_sampled": 11648000, "num_agent_steps_trained": 11648000}, "done": false, "episodes_total": 14560, "training_iteration": 455, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-01", "timestamp": 1666582201, "time_this_iter_s": 3.6407976150512695, "time_total_s": 1731.7955298423767, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1731.7955298423767, "timesteps_since_restore": 0, "iterations_since_restore": 455, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.120000000000005, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.18, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.4, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.00027783436235040426, "policy_loss": -0.00011880556121468544, "vf_loss": 7.770425319671631, "vf_explained_var": 0.602925181388855, "kl": 0.0028033058624714613, "entropy": 0.7608031034469604, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5836800, "num_env_steps_trained": 5836800, "num_agent_steps_sampled": 11673600, "num_agent_steps_trained": 11673600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 596.38, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 298.19}, "custom_metrics": {"sparse_reward_mean": 206.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.18, "shaped_reward_min": 156, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.0, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.62, "onion_pickup_agent_1_min": 12, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.95, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 16.39, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.59, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.29, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.46, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.28, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.4, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.16, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.34, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.1, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.26, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.59, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.29, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.59, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.29, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942631112972455, "mean_inference_ms": 1.2377770538170634, "mean_action_processing_ms": 0.13323589639973, "mean_env_wait_ms": 0.8356181316431414, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 516.0, "episode_reward_mean": 596.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 255.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 298.19}, "hist_stats": {"episode_reward": [636.0, 627.0, 627.0, 582.0, 630.0, 573.0, 570.0, 582.0, 627.0, 587.0, 587.0, 584.0, 573.0, 570.0, 627.0, 573.0, 584.0, 579.0, 573.0, 516.0, 630.0, 579.0, 627.0, 630.0, 576.0, 630.0, 636.0, 630.0, 579.0, 579.0, 630.0, 636.0, 579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 317.0, 313.0, 314.0, 311.0, 316.0, 292.0, 290.0, 315.0, 315.0, 282.0, 291.0, 280.0, 290.0, 292.0, 290.0, 311.0, 316.0, 294.0, 293.0, 293.0, 294.0, 299.0, 285.0, 281.0, 292.0, 286.0, 284.0, 313.0, 314.0, 286.0, 287.0, 288.0, 296.0, 292.0, 287.0, 294.0, 279.0, 255.0, 261.0, 311.0, 319.0, 288.0, 291.0, 308.0, 319.0, 316.0, 314.0, 280.0, 296.0, 311.0, 319.0, 322.0, 314.0, 324.0, 306.0, 284.0, 295.0, 290.0, 289.0, 313.0, 317.0, 319.0, 317.0, 291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942631112972455, "mean_inference_ms": 1.2377770538170634, "mean_action_processing_ms": 0.13323589639973, "mean_env_wait_ms": 0.8356181316431414, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11673600, "num_agent_steps_trained": 11673600, "num_env_steps_sampled": 5836800, "num_env_steps_trained": 5836800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5836800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11673600, "timers": {"training_iteration_time_ms": 3584.814, "learn_time_ms": 1086.882, "learn_throughput": 11776.804, "synch_weights_time_ms": 11.306}, "counters": {"num_env_steps_sampled": 5836800, "num_env_steps_trained": 5836800, "num_agent_steps_sampled": 11673600, "num_agent_steps_trained": 11673600}, "done": false, "episodes_total": 14592, "training_iteration": 456, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-04", "timestamp": 1666582204, "time_this_iter_s": 3.611459255218506, "time_total_s": 1735.4069890975952, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1735.4069890975952, "timesteps_since_restore": 0, "iterations_since_restore": 456, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.34, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 205.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.45, "shaped_reward_min": 165, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.0, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.75, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.48, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.75, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.75, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005887583829462528, "policy_loss": -0.0009819632396101952, "vf_loss": 7.737387657165527, "vf_explained_var": 0.621029794216156, "kl": 0.0028006762731820345, "entropy": 0.7610688209533691, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5849600, "num_env_steps_trained": 5849600, "num_agent_steps_sampled": 11699200, "num_agent_steps_trained": 11699200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 594.65, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 297.325}, "custom_metrics": {"sparse_reward_mean": 205.6, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 183.45, "shaped_reward_min": 165, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.54, "onion_pickup_agent_1_min": 13, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.0, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.33, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.75, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.25, "potting_onion_agent_1_min": 12, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 9, "dish_pickup_agent_1_mean": 5.67, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.22, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 9, "useful_dish_pickup_agent_1_mean": 5.48, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 9, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.31, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.75, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.25, "optimal_onion_potting_agent_1_min": 12, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.75, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.25, "viable_onion_potting_agent_1_min": 12, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942176481103501, "mean_inference_ms": 1.237682546407229, "mean_action_processing_ms": 0.13323101112203006, "mean_env_wait_ms": 0.8355603487641339, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 525.0, "episode_reward_mean": 594.65, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 259.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 297.325}, "hist_stats": {"episode_reward": [579.0, 582.0, 573.0, 582.0, 639.0, 627.0, 630.0, 630.0, 582.0, 582.0, 576.0, 582.0, 584.0, 576.0, 576.0, 627.0, 584.0, 581.0, 630.0, 587.0, 573.0, 525.0, 624.0, 579.0, 633.0, 579.0, 587.0, 579.0, 630.0, 573.0, 630.0, 579.0, 579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 291.0, 291.0, 277.0, 296.0, 291.0, 291.0, 319.0, 320.0, 314.0, 313.0, 314.0, 316.0, 312.0, 318.0, 291.0, 291.0, 289.0, 293.0, 291.0, 285.0, 286.0, 296.0, 301.0, 283.0, 293.0, 283.0, 289.0, 287.0, 313.0, 314.0, 290.0, 294.0, 298.0, 283.0, 311.0, 319.0, 294.0, 293.0, 287.0, 286.0, 260.0, 265.0, 313.0, 311.0, 283.0, 296.0, 317.0, 316.0, 291.0, 288.0, 305.0, 282.0, 285.0, 294.0, 320.0, 310.0, 283.0, 290.0, 316.0, 314.0, 293.0, 286.0, 293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6942176481103501, "mean_inference_ms": 1.237682546407229, "mean_action_processing_ms": 0.13323101112203006, "mean_env_wait_ms": 0.8355603487641339, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11699200, "num_agent_steps_trained": 11699200, "num_env_steps_sampled": 5849600, "num_env_steps_trained": 5849600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5849600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11699200, "timers": {"training_iteration_time_ms": 3577.701, "learn_time_ms": 1083.327, "learn_throughput": 11815.452, "synch_weights_time_ms": 11.746}, "counters": {"num_env_steps_sampled": 5849600, "num_env_steps_trained": 5849600, "num_agent_steps_sampled": 11699200, "num_agent_steps_trained": 11699200}, "done": false, "episodes_total": 14624, "training_iteration": 457, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-08", "timestamp": 1666582208, "time_this_iter_s": 3.549762010574341, "time_total_s": 1738.9567511081696, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1738.9567511081696, "timesteps_since_restore": 0, "iterations_since_restore": 457, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.416666666666668, "ram_util_percent": 10.633333333333333}} +{"custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 182.24, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0004856521845795214, "policy_loss": -0.0008793252054601908, "vf_loss": 7.77410888671875, "vf_explained_var": 0.6171582937240601, "kl": 0.0027221820782870054, "entropy": 0.7674758434295654, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5862400, "num_env_steps_trained": 5862400, "num_agent_steps_sampled": 11724800, "num_agent_steps_trained": 11724800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 589.84, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.92}, "custom_metrics": {"sparse_reward_mean": 203.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 182.24, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.66, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.42, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.5, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.38, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.56, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.42, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.0, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.2, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.02, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.5, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.5, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694177385931617, "mean_inference_ms": 1.2375913086644352, "mean_action_processing_ms": 0.1332256740537696, "mean_env_wait_ms": 0.8355032005845738, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 589.84, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 326.0}, "policy_reward_mean": {"ppo": 294.92}, "hist_stats": {"episode_reward": [579.0, 576.0, 630.0, 627.0, 630.0, 630.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 582.0, 582.0, 627.0, 636.0, 587.0, 573.0, 624.0, 582.0, 530.0, 573.0, 536.0, 633.0, 630.0, 587.0, 587.0, 587.0, 582.0, 573.0, 582.0, 582.0, 636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 288.0, 288.0, 311.0, 319.0, 311.0, 316.0, 304.0, 326.0, 314.0, 316.0, 283.0, 290.0, 286.0, 296.0, 293.0, 283.0, 296.0, 291.0, 291.0, 291.0, 314.0, 316.0, 291.0, 291.0, 290.0, 292.0, 313.0, 314.0, 317.0, 319.0, 296.0, 291.0, 285.0, 288.0, 318.0, 306.0, 292.0, 290.0, 268.0, 262.0, 288.0, 285.0, 265.0, 271.0, 315.0, 318.0, 316.0, 314.0, 290.0, 297.0, 295.0, 292.0, 294.0, 293.0, 297.0, 285.0, 290.0, 283.0, 291.0, 291.0, 291.0, 291.0, 317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.694177385931617, "mean_inference_ms": 1.2375913086644352, "mean_action_processing_ms": 0.1332256740537696, "mean_env_wait_ms": 0.8355032005845738, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11724800, "num_agent_steps_trained": 11724800, "num_env_steps_sampled": 5862400, "num_env_steps_trained": 5862400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5862400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11724800, "timers": {"training_iteration_time_ms": 3563.501, "learn_time_ms": 1067.366, "learn_throughput": 11992.141, "synch_weights_time_ms": 11.841}, "counters": {"num_env_steps_sampled": 5862400, "num_env_steps_trained": 5862400, "num_agent_steps_sampled": 11724800, "num_agent_steps_trained": 11724800}, "done": false, "episodes_total": 14656, "training_iteration": 458, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-12", "timestamp": 1666582212, "time_this_iter_s": 3.5484938621520996, "time_total_s": 1742.5052449703217, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1742.5052449703217, "timesteps_since_restore": 0, "iterations_since_restore": 458, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.54, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 181.62, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.66, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008513854118064046, "policy_loss": 0.0004615961806848645, "vf_loss": 7.743380069732666, "vf_explained_var": 0.6081850528717041, "kl": 0.0025588269345462322, "entropy": 0.7690985202789307, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5875200, "num_env_steps_trained": 5875200, "num_agent_steps_sampled": 11750400, "num_agent_steps_trained": 11750400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 588.02, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.01}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 181.62, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.84, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.66, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.37, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.48, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.33, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.34, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.49, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.2, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.38, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.1, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 5.03, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.01, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 1, "optimal_onion_potting_agent_0_mean": 16.48, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.33, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.48, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.33, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6941295586287947, "mean_inference_ms": 1.2374918339988819, "mean_action_processing_ms": 0.13322028825706636, "mean_env_wait_ms": 0.8354410346542884, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 588.02, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.01}, "hist_stats": {"episode_reward": [636.0, 587.0, 633.0, 636.0, 582.0, 627.0, 579.0, 576.0, 639.0, 579.0, 630.0, 582.0, 579.0, 579.0, 582.0, 582.0, 573.0, 627.0, 582.0, 630.0, 587.0, 584.0, 587.0, 636.0, 576.0, 636.0, 573.0, 570.0, 639.0, 527.0, 590.0, 587.0, 639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [317.0, 319.0, 294.0, 293.0, 317.0, 316.0, 317.0, 319.0, 292.0, 290.0, 309.0, 318.0, 286.0, 293.0, 291.0, 285.0, 317.0, 322.0, 293.0, 286.0, 313.0, 317.0, 292.0, 290.0, 283.0, 296.0, 283.0, 296.0, 293.0, 289.0, 286.0, 296.0, 286.0, 287.0, 311.0, 316.0, 288.0, 294.0, 315.0, 315.0, 284.0, 303.0, 293.0, 291.0, 298.0, 289.0, 320.0, 316.0, 283.0, 293.0, 322.0, 314.0, 290.0, 283.0, 277.0, 293.0, 320.0, 319.0, 268.0, 259.0, 296.0, 294.0, 288.0, 299.0, 320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6941295586287947, "mean_inference_ms": 1.2374918339988819, "mean_action_processing_ms": 0.13322028825706636, "mean_env_wait_ms": 0.8354410346542884, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11750400, "num_agent_steps_trained": 11750400, "num_env_steps_sampled": 5875200, "num_env_steps_trained": 5875200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5875200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11750400, "timers": {"training_iteration_time_ms": 3554.168, "learn_time_ms": 1062.428, "learn_throughput": 12047.879, "synch_weights_time_ms": 12.479}, "counters": {"num_env_steps_sampled": 5875200, "num_env_steps_trained": 5875200, "num_agent_steps_sampled": 11750400, "num_agent_steps_trained": 11750400}, "done": false, "episodes_total": 14688, "training_iteration": 459, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-15", "timestamp": 1666582215, "time_this_iter_s": 3.5224757194519043, "time_total_s": 1746.0277206897736, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1746.0277206897736, "timesteps_since_restore": 0, "iterations_since_restore": 459, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.860000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 179.96, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0015454906970262527, "policy_loss": 0.001154657220467925, "vf_loss": 7.783836364746094, "vf_explained_var": 0.6133667826652527, "kl": 0.002887023612856865, "entropy": 0.7751001119613647, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5888000, "num_env_steps_trained": 5888000, "num_agent_steps_sampled": 11776000, "num_agent_steps_trained": 11776000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 583.56, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.78}, "custom_metrics": {"sparse_reward_mean": 201.8, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 179.96, "shaped_reward_min": 43, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.49, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.12, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.36, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.26, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.24, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.12, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.08, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.02, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.12, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.12, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6940799934848328, "mean_inference_ms": 1.2373845747527157, "mean_action_processing_ms": 0.1332148833976582, "mean_env_wait_ms": 0.8353725766416104, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 123.0, "episode_reward_mean": 583.56, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.78}, "hist_stats": {"episode_reward": [639.0, 579.0, 576.0, 582.0, 582.0, 582.0, 582.0, 579.0, 576.0, 582.0, 576.0, 582.0, 579.0, 579.0, 576.0, 630.0, 627.0, 633.0, 587.0, 630.0, 582.0, 587.0, 579.0, 630.0, 582.0, 579.0, 627.0, 582.0, 579.0, 636.0, 630.0, 627.0, 582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [320.0, 319.0, 286.0, 293.0, 291.0, 285.0, 288.0, 294.0, 295.0, 287.0, 288.0, 294.0, 289.0, 293.0, 283.0, 296.0, 280.0, 296.0, 285.0, 297.0, 285.0, 291.0, 289.0, 293.0, 296.0, 283.0, 285.0, 294.0, 294.0, 282.0, 316.0, 314.0, 319.0, 308.0, 314.0, 319.0, 298.0, 289.0, 316.0, 314.0, 292.0, 290.0, 290.0, 297.0, 290.0, 289.0, 316.0, 314.0, 295.0, 287.0, 293.0, 286.0, 310.0, 317.0, 293.0, 289.0, 290.0, 289.0, 314.0, 322.0, 313.0, 317.0, 316.0, 311.0, 292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6940799934848328, "mean_inference_ms": 1.2373845747527157, "mean_action_processing_ms": 0.1332148833976582, "mean_env_wait_ms": 0.8353725766416104, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11776000, "num_agent_steps_trained": 11776000, "num_env_steps_sampled": 5888000, "num_env_steps_trained": 5888000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5888000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11776000, "timers": {"training_iteration_time_ms": 3526.638, "learn_time_ms": 1056.455, "learn_throughput": 12115.986, "synch_weights_time_ms": 12.692}, "counters": {"num_env_steps_sampled": 5888000, "num_env_steps_trained": 5888000, "num_agent_steps_sampled": 11776000, "num_agent_steps_trained": 11776000}, "done": false, "episodes_total": 14720, "training_iteration": 460, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-19", "timestamp": 1666582219, "time_this_iter_s": 3.6077170372009277, "time_total_s": 1749.6354377269745, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1749.6354377269745, "timesteps_since_restore": 0, "iterations_since_restore": 460, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.166666666666668, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 179.67, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002214742125943303, "policy_loss": -0.0026062014512717724, "vf_loss": 7.726222991943359, "vf_explained_var": 0.6182592511177063, "kl": 0.002622842788696289, "entropy": 0.7623258829116821, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5900800, "num_env_steps_trained": 5900800, "num_agent_steps_sampled": 11801600, "num_agent_steps_trained": 11801600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 582.07, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.035}, "custom_metrics": {"sparse_reward_mean": 201.2, "sparse_reward_min": 40, "sparse_reward_max": 220, "shaped_reward_mean": 179.67, "shaped_reward_min": 43, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.55, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.35, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.53, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 16.45, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.32, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.22, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.23, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.16, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.14, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.11, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.98, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 16.45, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 16.45, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6940392492048548, "mean_inference_ms": 1.2372834252469516, "mean_action_processing_ms": 0.13321063444359982, "mean_env_wait_ms": 0.8353075411248264, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 123.0, "episode_reward_mean": 582.07, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 61.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 291.035}, "hist_stats": {"episode_reward": [582.0, 582.0, 530.0, 123.0, 633.0, 576.0, 636.0, 479.0, 579.0, 582.0, 633.0, 630.0, 579.0, 579.0, 636.0, 576.0, 582.0, 576.0, 527.0, 582.0, 576.0, 582.0, 579.0, 576.0, 627.0, 513.0, 630.0, 582.0, 582.0, 573.0, 587.0, 582.0, 579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 291.0, 291.0, 267.0, 263.0, 62.0, 61.0, 317.0, 316.0, 291.0, 285.0, 319.0, 317.0, 248.0, 231.0, 291.0, 288.0, 290.0, 292.0, 317.0, 316.0, 316.0, 314.0, 286.0, 293.0, 288.0, 291.0, 317.0, 319.0, 286.0, 290.0, 292.0, 290.0, 285.0, 291.0, 254.0, 273.0, 294.0, 288.0, 285.0, 291.0, 288.0, 294.0, 291.0, 288.0, 293.0, 283.0, 314.0, 313.0, 265.0, 248.0, 316.0, 314.0, 287.0, 295.0, 290.0, 292.0, 294.0, 279.0, 299.0, 288.0, 293.0, 289.0, 293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6940392492048548, "mean_inference_ms": 1.2372834252469516, "mean_action_processing_ms": 0.13321063444359982, "mean_env_wait_ms": 0.8353075411248264, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11801600, "num_agent_steps_trained": 11801600, "num_env_steps_sampled": 5900800, "num_env_steps_trained": 5900800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5900800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11801600, "timers": {"training_iteration_time_ms": 3536.669, "learn_time_ms": 1064.03, "learn_throughput": 12029.737, "synch_weights_time_ms": 11.465}, "counters": {"num_env_steps_sampled": 5900800, "num_env_steps_trained": 5900800, "num_agent_steps_sampled": 11801600, "num_agent_steps_trained": 11801600}, "done": false, "episodes_total": 14752, "training_iteration": 461, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-23", "timestamp": 1666582223, "time_this_iter_s": 3.673919439315796, "time_total_s": 1753.3093571662903, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1753.3093571662903, "timesteps_since_restore": 0, "iterations_since_restore": 461, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.659999999999997, "ram_util_percent": 10.64}} +{"custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.44, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.83, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.43, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.43, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.43, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0009108797530643642, "policy_loss": -0.0012938372092321515, "vf_loss": 7.6942853927612305, "vf_explained_var": 0.6132245659828186, "kl": 0.002680128440260887, "entropy": 0.7729424238204956, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5913600, "num_env_steps_trained": 5913600, "num_agent_steps_sampled": 11827200, "num_agent_steps_trained": 11827200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 513.0, "episode_reward_mean": 588.24, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.12}, "custom_metrics": {"sparse_reward_mean": 203.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.44, "shaped_reward_min": 153, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.83, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.83, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.62, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.62, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.05, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.39, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.43, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.26, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.42, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.18, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.33, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.08, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 5.05, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.39, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.43, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.39, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.43, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6939940852335686, "mean_inference_ms": 1.2371854185406679, "mean_action_processing_ms": 0.13320635803541322, "mean_env_wait_ms": 0.8352454574282127, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 513.0, "episode_reward_mean": 588.24, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 247.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 294.12}, "hist_stats": {"episode_reward": [579.0, 630.0, 576.0, 636.0, 630.0, 582.0, 579.0, 513.0, 579.0, 587.0, 579.0, 582.0, 576.0, 579.0, 582.0, 582.0, 576.0, 576.0, 627.0, 573.0, 576.0, 582.0, 576.0, 636.0, 573.0, 633.0, 633.0, 582.0, 516.0, 587.0, 579.0, 587.0, 525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 286.0, 314.0, 316.0, 289.0, 287.0, 309.0, 327.0, 312.0, 318.0, 291.0, 291.0, 285.0, 294.0, 247.0, 266.0, 288.0, 291.0, 293.0, 294.0, 291.0, 288.0, 292.0, 290.0, 291.0, 285.0, 290.0, 289.0, 296.0, 286.0, 289.0, 293.0, 280.0, 296.0, 281.0, 295.0, 306.0, 321.0, 291.0, 282.0, 288.0, 288.0, 292.0, 290.0, 285.0, 291.0, 317.0, 319.0, 279.0, 294.0, 314.0, 319.0, 314.0, 319.0, 290.0, 292.0, 262.0, 254.0, 295.0, 292.0, 286.0, 293.0, 291.0, 296.0, 267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6939940852335686, "mean_inference_ms": 1.2371854185406679, "mean_action_processing_ms": 0.13320635803541322, "mean_env_wait_ms": 0.8352454574282127, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11827200, "num_agent_steps_trained": 11827200, "num_env_steps_sampled": 5913600, "num_env_steps_trained": 5913600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5913600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11827200, "timers": {"training_iteration_time_ms": 3533.966, "learn_time_ms": 1067.253, "learn_throughput": 11993.409, "synch_weights_time_ms": 12.171}, "counters": {"num_env_steps_sampled": 5913600, "num_env_steps_trained": 5913600, "num_agent_steps_sampled": 11827200, "num_agent_steps_trained": 11827200}, "done": false, "episodes_total": 14784, "training_iteration": 462, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-27", "timestamp": 1666582227, "time_this_iter_s": 3.553008794784546, "time_total_s": 1756.8623659610748, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1756.8623659610748, "timesteps_since_restore": 0, "iterations_since_restore": 462, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.720000000000002, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.53, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.7, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.51, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.3, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.43, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.51, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.3, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.51, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.3, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0042098090052604675, "policy_loss": 0.003829691093415022, "vf_loss": 7.629401206970215, "vf_explained_var": 0.6158540844917297, "kl": 0.0028561637736856937, "entropy": 0.765643835067749, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5926400, "num_env_steps_trained": 5926400, "num_agent_steps_sampled": 11852800, "num_agent_steps_trained": 11852800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 587.93, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 293.965}, "custom_metrics": {"sparse_reward_mean": 203.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.53, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.9, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.64, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.7, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.46, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 2, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.51, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 16.3, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.12, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.43, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.1, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 5.02, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.22, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.51, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 16.3, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.51, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 16.3, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.693949131407544, "mean_inference_ms": 1.2370817107814052, "mean_action_processing_ms": 0.13320149620964622, "mean_env_wait_ms": 0.8351810086093567, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 587.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 327.0}, "policy_reward_mean": {"ppo": 293.965}, "hist_stats": {"episode_reward": [525.0, 573.0, 630.0, 576.0, 579.0, 581.0, 630.0, 576.0, 627.0, 636.0, 633.0, 636.0, 630.0, 582.0, 582.0, 630.0, 579.0, 576.0, 579.0, 582.0, 579.0, 579.0, 630.0, 576.0, 579.0, 539.0, 587.0, 579.0, 582.0, 582.0, 579.0, 627.0, 576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [267.0, 258.0, 290.0, 283.0, 314.0, 316.0, 289.0, 287.0, 294.0, 285.0, 288.0, 293.0, 314.0, 316.0, 284.0, 292.0, 327.0, 300.0, 311.0, 325.0, 319.0, 314.0, 321.0, 315.0, 321.0, 309.0, 290.0, 292.0, 293.0, 289.0, 313.0, 317.0, 284.0, 295.0, 288.0, 288.0, 294.0, 285.0, 284.0, 298.0, 291.0, 288.0, 291.0, 288.0, 319.0, 311.0, 285.0, 291.0, 290.0, 289.0, 271.0, 268.0, 293.0, 294.0, 284.0, 295.0, 296.0, 286.0, 298.0, 284.0, 290.0, 289.0, 306.0, 321.0, 281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.693949131407544, "mean_inference_ms": 1.2370817107814052, "mean_action_processing_ms": 0.13320149620964622, "mean_env_wait_ms": 0.8351810086093567, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11852800, "num_agent_steps_trained": 11852800, "num_env_steps_sampled": 5926400, "num_env_steps_trained": 5926400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5926400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11852800, "timers": {"training_iteration_time_ms": 3539.299, "learn_time_ms": 1072.003, "learn_throughput": 11940.269, "synch_weights_time_ms": 11.492}, "counters": {"num_env_steps_sampled": 5926400, "num_env_steps_trained": 5926400, "num_agent_steps_sampled": 11852800, "num_agent_steps_trained": 11852800}, "done": false, "episodes_total": 14816, "training_iteration": 463, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-31", "timestamp": 1666582231, "time_this_iter_s": 3.610844373703003, "time_total_s": 1760.4732103347778, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1760.4732103347778, "timesteps_since_restore": 0, "iterations_since_restore": 463, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.466666666666665, "ram_util_percent": 10.616666666666665}} +{"custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.34, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.27, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.87, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.61, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.45, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.4, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.87, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.87, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0031582540832459927, "policy_loss": -0.0035429480485618114, "vf_loss": 7.66179084777832, "vf_explained_var": 0.6139554381370544, "kl": 0.002852272940799594, "entropy": 0.7629702091217041, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5939200, "num_env_steps_trained": 5939200, "num_agent_steps_sampled": 11878400, "num_agent_steps_trained": 11878400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 586.94, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.47}, "custom_metrics": {"sparse_reward_mean": 202.8, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 181.34, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.15, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.27, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.01, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 16.13, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.87, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.91, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.75, "dish_pickup_agent_1_min": 4, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.61, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.45, "soup_pickup_agent_1_min": 4, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.76, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.4, "soup_delivery_agent_1_min": 4, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.87, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.91, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.87, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.91, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6938957952732318, "mean_inference_ms": 1.237007163913863, "mean_action_processing_ms": 0.13319639842425457, "mean_env_wait_ms": 0.8351925904711837, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 586.94, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 293.47}, "hist_stats": {"episode_reward": [576.0, 636.0, 536.0, 525.0, 633.0, 587.0, 582.0, 576.0, 587.0, 582.0, 579.0, 579.0, 636.0, 630.0, 576.0, 573.0, 582.0, 576.0, 527.0, 590.0, 582.0, 519.0, 633.0, 582.0, 582.0, 633.0, 579.0, 522.0, 573.0, 627.0, 630.0, 576.0, 590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [281.0, 295.0, 322.0, 314.0, 268.0, 268.0, 260.0, 265.0, 318.0, 315.0, 294.0, 293.0, 293.0, 289.0, 288.0, 288.0, 294.0, 293.0, 281.0, 301.0, 281.0, 298.0, 291.0, 288.0, 317.0, 319.0, 319.0, 311.0, 287.0, 289.0, 287.0, 286.0, 287.0, 295.0, 283.0, 293.0, 256.0, 271.0, 296.0, 294.0, 290.0, 292.0, 268.0, 251.0, 322.0, 311.0, 295.0, 287.0, 290.0, 292.0, 312.0, 321.0, 291.0, 288.0, 261.0, 261.0, 284.0, 289.0, 319.0, 308.0, 317.0, 313.0, 286.0, 290.0, 289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6938957952732318, "mean_inference_ms": 1.237007163913863, "mean_action_processing_ms": 0.13319639842425457, "mean_env_wait_ms": 0.8351925904711837, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11878400, "num_agent_steps_trained": 11878400, "num_env_steps_sampled": 5939200, "num_env_steps_trained": 5939200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5939200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11878400, "timers": {"training_iteration_time_ms": 3545.968, "learn_time_ms": 1064.613, "learn_throughput": 12023.149, "synch_weights_time_ms": 12.154}, "counters": {"num_env_steps_sampled": 5939200, "num_env_steps_trained": 5939200, "num_agent_steps_sampled": 11878400, "num_agent_steps_trained": 11878400}, "done": false, "episodes_total": 14848, "training_iteration": 464, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-34", "timestamp": 1666582234, "time_this_iter_s": 3.704625129699707, "time_total_s": 1764.1778354644775, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1764.1778354644775, "timesteps_since_restore": 0, "iterations_since_restore": 464, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.779999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.39, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.54, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.39, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 17.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.88, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.68, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.56, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.5, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 17.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 17.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0005369747523218393, "policy_loss": 0.00015844125300645828, "vf_loss": 7.604679107666016, "vf_explained_var": 0.6035102605819702, "kl": 0.003216771874576807, "entropy": 0.7638680338859558, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5952000, "num_env_steps_trained": 5952000, "num_agent_steps_sampled": 11904000, "num_agent_steps_trained": 11904000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 584.79, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.395}, "custom_metrics": {"sparse_reward_mean": 202.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 180.39, "shaped_reward_min": 159, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.54, "onion_pickup_agent_0_min": 13, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 17.39, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.54, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 17.24, "potting_onion_agent_0_min": 12, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.38, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.88, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.81, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.68, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.08, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.56, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.5, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 17.24, "optimal_onion_potting_agent_0_min": 12, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.38, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 17.24, "viable_onion_potting_agent_0_min": 12, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.38, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6938517527377859, "mean_inference_ms": 1.236971528906978, "mean_action_processing_ms": 0.13319191119677623, "mean_env_wait_ms": 0.8352425477668859, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 519.0, "episode_reward_mean": 584.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 251.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.395}, "hist_stats": {"episode_reward": [590.0, 573.0, 576.0, 636.0, 579.0, 627.0, 582.0, 630.0, 576.0, 633.0, 525.0, 627.0, 627.0, 582.0, 522.0, 576.0, 633.0, 584.0, 579.0, 582.0, 630.0, 582.0, 533.0, 582.0, 636.0, 630.0, 579.0, 582.0, 587.0, 579.0, 530.0, 582.0, 576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 301.0, 283.0, 290.0, 286.0, 290.0, 315.0, 321.0, 288.0, 291.0, 311.0, 316.0, 293.0, 289.0, 312.0, 318.0, 291.0, 285.0, 317.0, 316.0, 260.0, 265.0, 309.0, 318.0, 314.0, 313.0, 292.0, 290.0, 251.0, 271.0, 287.0, 289.0, 315.0, 318.0, 287.0, 297.0, 289.0, 290.0, 286.0, 296.0, 309.0, 321.0, 281.0, 301.0, 274.0, 259.0, 299.0, 283.0, 314.0, 322.0, 313.0, 317.0, 283.0, 296.0, 286.0, 296.0, 291.0, 296.0, 293.0, 286.0, 260.0, 270.0, 289.0, 293.0, 283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6938517527377859, "mean_inference_ms": 1.236971528906978, "mean_action_processing_ms": 0.13319191119677623, "mean_env_wait_ms": 0.8352425477668859, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11904000, "num_agent_steps_trained": 11904000, "num_env_steps_sampled": 5952000, "num_env_steps_trained": 5952000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5952000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11904000, "timers": {"training_iteration_time_ms": 3551.553, "learn_time_ms": 1050.795, "learn_throughput": 12181.259, "synch_weights_time_ms": 12.202}, "counters": {"num_env_steps_sampled": 5952000, "num_env_steps_trained": 5952000, "num_agent_steps_sampled": 11904000, "num_agent_steps_trained": 11904000}, "done": false, "episodes_total": 14880, "training_iteration": 465, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-38", "timestamp": 1666582238, "time_this_iter_s": 3.6914851665496826, "time_total_s": 1767.8693206310272, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1767.8693206310272, "timesteps_since_restore": 0, "iterations_since_restore": 465, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.166666666666668, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.98, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.63, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.41, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 17.26, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.29, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.91, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.83, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.58, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.53, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.48, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 17.26, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.29, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 17.26, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.29, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0025697732344269753, "policy_loss": -0.0029487167485058308, "vf_loss": 7.580999851226807, "vf_explained_var": 0.590236485004425, "kl": 0.0032103369012475014, "entropy": 0.7583142518997192, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5964800, "num_env_steps_trained": 5964800, "num_agent_steps_sampled": 11929600, "num_agent_steps_trained": 11929600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 584.78, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.39}, "custom_metrics": {"sparse_reward_mean": 202.4, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 179.98, "shaped_reward_min": 159, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.56, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.63, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.41, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.48, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 17.26, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.29, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.91, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.69, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.83, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.58, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.53, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.64, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.48, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 17.26, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.29, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 17.26, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.29, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.693806990818135, "mean_inference_ms": 1.2369434972732614, "mean_action_processing_ms": 0.13318722167200414, "mean_env_wait_ms": 0.8352965205333814, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 519.0, "episode_reward_mean": 584.78, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 252.0}, "policy_reward_max": {"ppo": 325.0}, "policy_reward_mean": {"ppo": 292.39}, "hist_stats": {"episode_reward": [576.0, 582.0, 579.0, 519.0, 582.0, 576.0, 525.0, 633.0, 579.0, 576.0, 633.0, 630.0, 582.0, 630.0, 579.0, 579.0, 573.0, 582.0, 582.0, 587.0, 579.0, 627.0, 630.0, 633.0, 579.0, 582.0, 582.0, 525.0, 579.0, 573.0, 576.0, 582.0, 587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 293.0, 288.0, 294.0, 285.0, 294.0, 259.0, 260.0, 289.0, 293.0, 289.0, 287.0, 260.0, 265.0, 308.0, 325.0, 291.0, 288.0, 282.0, 294.0, 317.0, 316.0, 317.0, 313.0, 289.0, 293.0, 311.0, 319.0, 288.0, 291.0, 288.0, 291.0, 287.0, 286.0, 290.0, 292.0, 289.0, 293.0, 297.0, 290.0, 297.0, 282.0, 322.0, 305.0, 314.0, 316.0, 314.0, 319.0, 290.0, 289.0, 289.0, 293.0, 291.0, 291.0, 262.0, 263.0, 289.0, 290.0, 288.0, 285.0, 293.0, 283.0, 290.0, 292.0, 287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.693806990818135, "mean_inference_ms": 1.2369434972732614, "mean_action_processing_ms": 0.13318722167200414, "mean_env_wait_ms": 0.8352965205333814, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11929600, "num_agent_steps_trained": 11929600, "num_env_steps_sampled": 5964800, "num_env_steps_trained": 5964800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5964800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11929600, "timers": {"training_iteration_time_ms": 3551.928, "learn_time_ms": 1053.252, "learn_throughput": 12152.835, "synch_weights_time_ms": 12.209}, "counters": {"num_env_steps_sampled": 5964800, "num_env_steps_trained": 5964800, "num_agent_steps_sampled": 11929600, "num_agent_steps_trained": 11929600}, "done": false, "episodes_total": 14912, "training_iteration": 466, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-42", "timestamp": 1666582242, "time_this_iter_s": 3.598388195037842, "time_total_s": 1771.467708826065, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1771.467708826065, "timesteps_since_restore": 0, "iterations_since_restore": 466, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.7, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.98, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.33, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.08, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.15, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.93, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.66, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.93, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.93, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003803360741585493, "policy_loss": -0.0007807364454492927, "vf_loss": 7.784968852996826, "vf_explained_var": 0.5778146386146545, "kl": 0.004238889552652836, "entropy": 0.7561917304992676, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5977600, "num_env_steps_trained": 5977600, "num_agent_steps_sampled": 11955200, "num_agent_steps_trained": 11955200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 573.38, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.69}, "custom_metrics": {"sparse_reward_mean": 198.2, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 176.98, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.26, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.33, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.08, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.15, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.08, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.93, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.64, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.77, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.51, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.66, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.41, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.32, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.93, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.93, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937653903818739, "mean_inference_ms": 1.2368836731151416, "mean_action_processing_ms": 0.13318189319245324, "mean_env_wait_ms": 0.8352902993544786, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 573.38, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.69}, "hist_stats": {"episode_reward": [587.0, 630.0, 582.0, 587.0, 636.0, 530.0, 536.0, 573.0, 576.0, 582.0, 570.0, 570.0, 630.0, 587.0, 573.0, 576.0, 573.0, 573.0, 579.0, 579.0, 579.0, 530.0, 579.0, 522.0, 630.0, 630.0, 576.0, 579.0, 576.0, 573.0, 522.0, 579.0, 633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 300.0, 319.0, 311.0, 288.0, 294.0, 288.0, 299.0, 320.0, 316.0, 263.0, 267.0, 269.0, 267.0, 283.0, 290.0, 295.0, 281.0, 292.0, 290.0, 280.0, 290.0, 286.0, 284.0, 317.0, 313.0, 299.0, 288.0, 284.0, 289.0, 286.0, 290.0, 296.0, 277.0, 283.0, 290.0, 296.0, 283.0, 289.0, 290.0, 296.0, 283.0, 258.0, 272.0, 287.0, 292.0, 266.0, 256.0, 313.0, 317.0, 310.0, 320.0, 287.0, 289.0, 291.0, 288.0, 289.0, 287.0, 285.0, 288.0, 258.0, 264.0, 296.0, 283.0, 313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937653903818739, "mean_inference_ms": 1.2368836731151416, "mean_action_processing_ms": 0.13318189319245324, "mean_env_wait_ms": 0.8352902993544786, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11955200, "num_agent_steps_trained": 11955200, "num_env_steps_sampled": 5977600, "num_env_steps_trained": 5977600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5977600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11955200, "timers": {"training_iteration_time_ms": 3555.436, "learn_time_ms": 1057.925, "learn_throughput": 12099.152, "synch_weights_time_ms": 11.784}, "counters": {"num_env_steps_sampled": 5977600, "num_env_steps_trained": 5977600, "num_agent_steps_sampled": 11955200, "num_agent_steps_trained": 11955200}, "done": false, "episodes_total": 14944, "training_iteration": 467, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-46", "timestamp": 1666582246, "time_this_iter_s": 3.579774856567383, "time_total_s": 1775.0474836826324, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1775.0474836826324, "timesteps_since_restore": 0, "iterations_since_restore": 467, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.880000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 177.08, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.56, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.41, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.76, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.41, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.41, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024767438881099224, "policy_loss": -0.002866453258320689, "vf_loss": 7.718918323516846, "vf_explained_var": 0.5764654874801636, "kl": 0.0026280293241143227, "entropy": 0.7643646597862244, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 5990400, "num_env_steps_trained": 5990400, "num_agent_steps_sampled": 11980800, "num_agent_steps_trained": 11980800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 572.68, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.34}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 177.08, "shaped_reward_min": 26, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.86, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 24, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.71, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 24, "useful_onion_pickup_agent_1_mean": 15.56, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.07, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.57, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.41, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.98, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.59, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.76, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.27, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.19, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.57, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.41, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.57, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.41, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937253357167055, "mean_inference_ms": 1.2367844547201465, "mean_action_processing_ms": 0.1331752417353079, "mean_env_wait_ms": 0.8352193484259972, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 66.0, "episode_reward_mean": 572.68, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 286.34}, "hist_stats": {"episode_reward": [633.0, 582.0, 576.0, 582.0, 630.0, 627.0, 579.0, 630.0, 527.0, 582.0, 630.0, 579.0, 587.0, 573.0, 570.0, 582.0, 584.0, 627.0, 576.0, 633.0, 579.0, 630.0, 567.0, 579.0, 582.0, 579.0, 582.0, 633.0, 579.0, 639.0, 570.0, 576.0, 582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 320.0, 294.0, 288.0, 293.0, 283.0, 292.0, 290.0, 314.0, 316.0, 308.0, 319.0, 290.0, 289.0, 316.0, 314.0, 252.0, 275.0, 288.0, 294.0, 313.0, 317.0, 283.0, 296.0, 288.0, 299.0, 289.0, 284.0, 285.0, 285.0, 292.0, 290.0, 296.0, 288.0, 314.0, 313.0, 286.0, 290.0, 320.0, 313.0, 288.0, 291.0, 319.0, 311.0, 282.0, 285.0, 294.0, 285.0, 291.0, 291.0, 289.0, 290.0, 296.0, 286.0, 318.0, 315.0, 292.0, 287.0, 319.0, 320.0, 278.0, 292.0, 284.0, 292.0, 293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6937253357167055, "mean_inference_ms": 1.2367844547201465, "mean_action_processing_ms": 0.1331752417353079, "mean_env_wait_ms": 0.8352193484259972, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 11980800, "num_agent_steps_trained": 11980800, "num_env_steps_sampled": 5990400, "num_env_steps_trained": 5990400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 5990400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 11980800, "timers": {"training_iteration_time_ms": 3566.61, "learn_time_ms": 1066.856, "learn_throughput": 11997.872, "synch_weights_time_ms": 12.749}, "counters": {"num_env_steps_sampled": 5990400, "num_env_steps_trained": 5990400, "num_agent_steps_sampled": 11980800, "num_agent_steps_trained": 11980800}, "done": false, "episodes_total": 14976, "training_iteration": 468, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-50", "timestamp": 1666582250, "time_this_iter_s": 3.6539394855499268, "time_total_s": 1778.7014231681824, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1778.7014231681824, "timesteps_since_restore": 0, "iterations_since_restore": 468, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.416666666666668, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 177.19, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.69, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 16.86, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.39, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.87, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.39, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.39, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016068818513303995, "policy_loss": 0.0012329340679571033, "vf_loss": 7.566259384155273, "vf_explained_var": 0.6088271737098694, "kl": 0.002691782545298338, "entropy": 0.7653552293777466, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6003200, "num_env_steps_trained": 6003200, "num_agent_steps_sampled": 12006400, "num_agent_steps_trained": 12006400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 572.39, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 286.195}, "custom_metrics": {"sparse_reward_mean": 197.6, "sparse_reward_min": 20, "sparse_reward_max": 220, "shaped_reward_mean": 177.19, "shaped_reward_min": 26, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.97, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.69, "onion_pickup_agent_1_min": 6, "onion_pickup_agent_1_max": 20, "useful_onion_pickup_agent_0_mean": 16.86, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 1, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.39, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.58, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.87, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.09, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.04, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.26, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.17, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 1, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.39, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 1, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.39, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6936868282656675, "mean_inference_ms": 1.2366718724154204, "mean_action_processing_ms": 0.133169323115993, "mean_env_wait_ms": 0.8351444322158054, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 66.0, "episode_reward_mean": 572.39, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 31.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 286.195}, "hist_stats": {"episode_reward": [582.0, 579.0, 525.0, 573.0, 579.0, 587.0, 522.0, 570.0, 576.0, 525.0, 576.0, 627.0, 576.0, 576.0, 627.0, 587.0, 582.0, 584.0, 525.0, 570.0, 582.0, 576.0, 582.0, 473.0, 582.0, 579.0, 579.0, 525.0, 587.0, 576.0, 66.0, 582.0, 587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 286.0, 293.0, 269.0, 256.0, 286.0, 287.0, 291.0, 288.0, 288.0, 299.0, 263.0, 259.0, 284.0, 286.0, 288.0, 288.0, 263.0, 262.0, 288.0, 288.0, 321.0, 306.0, 288.0, 288.0, 294.0, 282.0, 311.0, 316.0, 288.0, 299.0, 288.0, 294.0, 299.0, 285.0, 263.0, 262.0, 290.0, 280.0, 292.0, 290.0, 285.0, 291.0, 293.0, 289.0, 231.0, 242.0, 290.0, 292.0, 294.0, 285.0, 282.0, 297.0, 263.0, 262.0, 301.0, 286.0, 290.0, 286.0, 31.0, 35.0, 287.0, 295.0, 295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6936868282656675, "mean_inference_ms": 1.2366718724154204, "mean_action_processing_ms": 0.133169323115993, "mean_env_wait_ms": 0.8351444322158054, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12006400, "num_agent_steps_trained": 12006400, "num_env_steps_sampled": 6003200, "num_env_steps_trained": 6003200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6003200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12006400, "timers": {"training_iteration_time_ms": 3573.074, "learn_time_ms": 1067.591, "learn_throughput": 11989.609, "synch_weights_time_ms": 11.929}, "counters": {"num_env_steps_sampled": 6003200, "num_env_steps_trained": 6003200, "num_agent_steps_sampled": 12006400, "num_agent_steps_trained": 12006400}, "done": false, "episodes_total": 15008, "training_iteration": 469, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-53", "timestamp": 1666582253, "time_this_iter_s": 3.6023151874542236, "time_total_s": 1782.3037383556366, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1782.3037383556366, "timesteps_since_restore": 0, "iterations_since_restore": 469, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.7, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.41, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.9, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.52, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.9, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.9, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004652185889426619, "policy_loss": 9.670722647570074e-05, "vf_loss": 7.476959228515625, "vf_explained_var": 0.5936903953552246, "kl": 0.002335474593564868, "entropy": 0.7583696246147156, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6016000, "num_env_steps_trained": 6016000, "num_agent_steps_sampled": 12032000, "num_agent_steps_trained": 12032000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 583.61, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.805}, "custom_metrics": {"sparse_reward_mean": 201.6, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 180.41, "shaped_reward_min": 114, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.3, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.98, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.21, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.9, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 2, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.65, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 9, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.52, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 3, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.03, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 2, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.37, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.25, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.9, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.9, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6936436685004764, "mean_inference_ms": 1.23656257555819, "mean_action_processing_ms": 0.13316370238818215, "mean_env_wait_ms": 0.8350735735228274, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 354.0, "episode_reward_mean": 583.61, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 175.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 291.805}, "hist_stats": {"episode_reward": [587.0, 582.0, 354.0, 590.0, 576.0, 587.0, 525.0, 633.0, 582.0, 530.0, 582.0, 630.0, 582.0, 633.0, 582.0, 633.0, 582.0, 573.0, 582.0, 579.0, 579.0, 579.0, 582.0, 405.0, 573.0, 582.0, 582.0, 582.0, 544.0, 573.0, 576.0, 573.0, 579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [295.0, 292.0, 289.0, 293.0, 175.0, 179.0, 289.0, 301.0, 291.0, 285.0, 296.0, 291.0, 260.0, 265.0, 314.0, 319.0, 291.0, 291.0, 262.0, 268.0, 294.0, 288.0, 314.0, 316.0, 291.0, 291.0, 314.0, 319.0, 294.0, 288.0, 314.0, 319.0, 299.0, 283.0, 285.0, 288.0, 289.0, 293.0, 289.0, 290.0, 286.0, 293.0, 288.0, 291.0, 284.0, 298.0, 191.0, 214.0, 291.0, 282.0, 288.0, 294.0, 294.0, 288.0, 292.0, 290.0, 266.0, 278.0, 286.0, 287.0, 290.0, 286.0, 286.0, 287.0, 286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6936436685004764, "mean_inference_ms": 1.23656257555819, "mean_action_processing_ms": 0.13316370238818215, "mean_env_wait_ms": 0.8350735735228274, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12032000, "num_agent_steps_trained": 12032000, "num_env_steps_sampled": 6016000, "num_env_steps_trained": 6016000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6016000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12032000, "timers": {"training_iteration_time_ms": 3573.205, "learn_time_ms": 1072.241, "learn_throughput": 11937.62, "synch_weights_time_ms": 11.283}, "counters": {"num_env_steps_sampled": 6016000, "num_env_steps_trained": 6016000, "num_agent_steps_sampled": 12032000, "num_agent_steps_trained": 12032000}, "done": false, "episodes_total": 15040, "training_iteration": 470, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-30-57", "timestamp": 1666582257, "time_this_iter_s": 3.593625545501709, "time_total_s": 1785.8973639011383, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1785.8973639011383, "timesteps_since_restore": 0, "iterations_since_restore": 470, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 180.25, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.33, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.88, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.96, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.23, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.88, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.88, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0006142787169665098, "policy_loss": -0.000997619703412056, "vf_loss": 7.701852321624756, "vf_explained_var": 0.5957615375518799, "kl": 0.002363224048167467, "entropy": 0.7736892700195312, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6028800, "num_env_steps_trained": 6028800, "num_agent_steps_sampled": 12057600, "num_agent_steps_trained": 12057600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 585.45, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 292.725}, "custom_metrics": {"sparse_reward_mean": 202.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 180.25, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.33, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.0, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 17.22, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.88, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.77, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.55, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.96, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.44, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.9, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.32, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.9, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.23, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.88, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.77, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.88, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.77, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935970828707496, "mean_inference_ms": 1.2364517360045373, "mean_action_processing_ms": 0.13315791629657373, "mean_env_wait_ms": 0.8350010650008328, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 585.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 292.725}, "hist_stats": {"episode_reward": [579.0, 579.0, 579.0, 576.0, 627.0, 579.0, 579.0, 636.0, 579.0, 573.0, 579.0, 636.0, 630.0, 582.0, 579.0, 579.0, 576.0, 636.0, 627.0, 636.0, 519.0, 627.0, 582.0, 587.0, 579.0, 587.0, 582.0, 636.0, 579.0, 633.0, 579.0, 579.0, 579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [286.0, 293.0, 294.0, 285.0, 287.0, 292.0, 288.0, 288.0, 316.0, 311.0, 291.0, 288.0, 285.0, 294.0, 317.0, 319.0, 288.0, 291.0, 288.0, 285.0, 293.0, 286.0, 319.0, 317.0, 315.0, 315.0, 299.0, 283.0, 288.0, 291.0, 291.0, 288.0, 284.0, 292.0, 320.0, 316.0, 316.0, 311.0, 312.0, 324.0, 266.0, 253.0, 311.0, 316.0, 293.0, 289.0, 288.0, 299.0, 286.0, 293.0, 288.0, 299.0, 297.0, 285.0, 317.0, 319.0, 286.0, 293.0, 320.0, 313.0, 287.0, 292.0, 286.0, 293.0, 288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935970828707496, "mean_inference_ms": 1.2364517360045373, "mean_action_processing_ms": 0.13315791629657373, "mean_env_wait_ms": 0.8350010650008328, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12057600, "num_agent_steps_trained": 12057600, "num_env_steps_sampled": 6028800, "num_env_steps_trained": 6028800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6028800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12057600, "timers": {"training_iteration_time_ms": 3569.429, "learn_time_ms": 1068.864, "learn_throughput": 11975.332, "synch_weights_time_ms": 11.406}, "counters": {"num_env_steps_sampled": 6028800, "num_env_steps_trained": 6028800, "num_agent_steps_sampled": 12057600, "num_agent_steps_trained": 12057600}, "done": false, "episodes_total": 15072, "training_iteration": 471, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-01", "timestamp": 1666582261, "time_this_iter_s": 3.6229758262634277, "time_total_s": 1789.5203397274017, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1789.5203397274017, "timesteps_since_restore": 0, "iterations_since_restore": 471, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.439999999999998, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.95, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.49, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.56, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.31, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.49, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.56, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.49, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.56, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0034096711315214634, "policy_loss": -0.0038029851857572794, "vf_loss": 7.788510799407959, "vf_explained_var": 0.601466715335846, "kl": 0.0035887700505554676, "entropy": 0.7710731625556946, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6041600, "num_env_steps_trained": 6041600, "num_agent_steps_sampled": 12083200, "num_agent_steps_trained": 12083200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 574.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.475}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.95, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.92, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.8, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.79, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.64, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.49, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.56, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.31, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.84, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.2, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.49, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.56, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.49, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.56, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935537348079653, "mean_inference_ms": 1.2363514110154021, "mean_action_processing_ms": 0.13315288003710485, "mean_env_wait_ms": 0.8349377343006816, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 574.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 287.475}, "hist_stats": {"episode_reward": [579.0, 570.0, 522.0, 582.0, 582.0, 579.0, 633.0, 582.0, 582.0, 630.0, 576.0, 582.0, 570.0, 582.0, 570.0, 633.0, 587.0, 582.0, 579.0, 633.0, 584.0, 456.0, 630.0, 582.0, 587.0, 582.0, 582.0, 573.0, 587.0, 587.0, 519.0, 627.0, 582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 290.0, 280.0, 258.0, 264.0, 295.0, 287.0, 292.0, 290.0, 287.0, 292.0, 314.0, 319.0, 288.0, 294.0, 296.0, 286.0, 314.0, 316.0, 292.0, 284.0, 296.0, 286.0, 294.0, 276.0, 293.0, 289.0, 287.0, 283.0, 315.0, 318.0, 297.0, 290.0, 290.0, 292.0, 286.0, 293.0, 317.0, 316.0, 291.0, 293.0, 223.0, 233.0, 309.0, 321.0, 291.0, 291.0, 289.0, 298.0, 289.0, 293.0, 294.0, 288.0, 279.0, 294.0, 296.0, 291.0, 286.0, 301.0, 256.0, 263.0, 314.0, 313.0, 285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935537348079653, "mean_inference_ms": 1.2363514110154021, "mean_action_processing_ms": 0.13315288003710485, "mean_env_wait_ms": 0.8349377343006816, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12083200, "num_agent_steps_trained": 12083200, "num_env_steps_sampled": 6041600, "num_env_steps_trained": 6041600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6041600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12083200, "timers": {"training_iteration_time_ms": 3574.198, "learn_time_ms": 1069.172, "learn_throughput": 11971.882, "synch_weights_time_ms": 10.839}, "counters": {"num_env_steps_sampled": 6041600, "num_env_steps_trained": 6041600, "num_agent_steps_sampled": 12083200, "num_agent_steps_trained": 12083200}, "done": false, "episodes_total": 15104, "training_iteration": 472, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-05", "timestamp": 1666582265, "time_this_iter_s": 3.600015878677368, "time_total_s": 1793.120355606079, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1793.120355606079, "timesteps_since_restore": 0, "iterations_since_restore": 472, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.98, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.52, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.47, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.29, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.12, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.36, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.12, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.12, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003118330496363342, "policy_loss": -0.0007060921634547412, "vf_loss": 7.771193504333496, "vf_explained_var": 0.6044775247573853, "kl": 0.0033143041655421257, "entropy": 0.7657217979431152, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6054400, "num_env_steps_trained": 6054400, "num_agent_steps_sampled": 12108800, "num_agent_steps_trained": 12108800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 569.92, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.96}, "custom_metrics": {"sparse_reward_mean": 197.2, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.52, "shaped_reward_min": 122, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.01, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.47, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.87, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.29, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.07, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.68, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.12, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.36, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.05, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.21, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.15, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.68, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.12, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.68, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.12, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935046216697461, "mean_inference_ms": 1.2362480338015869, "mean_action_processing_ms": 0.13314683432029636, "mean_env_wait_ms": 0.8348684647105502, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 402.0, "episode_reward_mean": 569.92, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.96}, "hist_stats": {"episode_reward": [582.0, 630.0, 630.0, 639.0, 630.0, 530.0, 582.0, 579.0, 630.0, 582.0, 570.0, 579.0, 627.0, 582.0, 519.0, 587.0, 573.0, 627.0, 402.0, 579.0, 525.0, 630.0, 579.0, 582.0, 633.0, 564.0, 519.0, 579.0, 627.0, 579.0, 525.0, 579.0, 582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [285.0, 297.0, 316.0, 314.0, 314.0, 316.0, 319.0, 320.0, 319.0, 311.0, 260.0, 270.0, 291.0, 291.0, 289.0, 290.0, 310.0, 320.0, 294.0, 288.0, 290.0, 280.0, 295.0, 284.0, 314.0, 313.0, 292.0, 290.0, 252.0, 267.0, 288.0, 299.0, 285.0, 288.0, 321.0, 306.0, 202.0, 200.0, 288.0, 291.0, 260.0, 265.0, 319.0, 311.0, 291.0, 288.0, 294.0, 288.0, 319.0, 314.0, 279.0, 285.0, 264.0, 255.0, 292.0, 287.0, 313.0, 314.0, 286.0, 293.0, 261.0, 264.0, 293.0, 286.0, 288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6935046216697461, "mean_inference_ms": 1.2362480338015869, "mean_action_processing_ms": 0.13314683432029636, "mean_env_wait_ms": 0.8348684647105502, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12108800, "num_agent_steps_trained": 12108800, "num_env_steps_sampled": 6054400, "num_env_steps_trained": 6054400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6054400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12108800, "timers": {"training_iteration_time_ms": 3566.626, "learn_time_ms": 1064.88, "learn_throughput": 12020.135, "synch_weights_time_ms": 11.342}, "counters": {"num_env_steps_sampled": 6054400, "num_env_steps_trained": 6054400, "num_agent_steps_sampled": 12108800, "num_agent_steps_trained": 12108800}, "done": false, "episodes_total": 15136, "training_iteration": 473, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-08", "timestamp": 1666582268, "time_this_iter_s": 3.544827699661255, "time_total_s": 1796.6651833057404, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1796.6651833057404, "timesteps_since_restore": 0, "iterations_since_restore": 473, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.433333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.36, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.37, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.19, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.07, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.4, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.67, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.07, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.07, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0026534530334174633, "policy_loss": 0.0022541265934705734, "vf_loss": 7.776371479034424, "vf_explained_var": 0.6059039235115051, "kl": 0.003932251129299402, "entropy": 0.7566198110580444, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6067200, "num_env_steps_trained": 6067200, "num_agent_steps_sampled": 12134400, "num_agent_steps_trained": 12134400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 568.16, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.08}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 175.36, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 17.08, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.37, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.89, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.19, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.72, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.07, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.86, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.47, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.73, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.4, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.69, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.23, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.67, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.16, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.72, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.07, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.72, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.07, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6934521429818895, "mean_inference_ms": 1.2361475067867969, "mean_action_processing_ms": 0.13314094767674708, "mean_env_wait_ms": 0.8348020091355736, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 568.16, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.08}, "hist_stats": {"episode_reward": [582.0, 522.0, 636.0, 579.0, 582.0, 579.0, 582.0, 516.0, 473.0, 582.0, 576.0, 576.0, 587.0, 582.0, 465.0, 573.0, 576.0, 579.0, 630.0, 579.0, 582.0, 576.0, 573.0, 576.0, 519.0, 570.0, 516.0, 576.0, 570.0, 525.0, 582.0, 579.0, 627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 294.0, 261.0, 261.0, 320.0, 316.0, 290.0, 289.0, 288.0, 294.0, 286.0, 293.0, 291.0, 291.0, 267.0, 249.0, 238.0, 235.0, 291.0, 291.0, 284.0, 292.0, 289.0, 287.0, 294.0, 293.0, 291.0, 291.0, 223.0, 242.0, 292.0, 281.0, 286.0, 290.0, 293.0, 286.0, 317.0, 313.0, 286.0, 293.0, 289.0, 293.0, 292.0, 284.0, 283.0, 290.0, 286.0, 290.0, 271.0, 248.0, 289.0, 281.0, 254.0, 262.0, 286.0, 290.0, 280.0, 290.0, 258.0, 267.0, 290.0, 292.0, 293.0, 286.0, 312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6934521429818895, "mean_inference_ms": 1.2361475067867969, "mean_action_processing_ms": 0.13314094767674708, "mean_env_wait_ms": 0.8348020091355736, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12134400, "num_agent_steps_trained": 12134400, "num_env_steps_sampled": 6067200, "num_env_steps_trained": 6067200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6067200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12134400, "timers": {"training_iteration_time_ms": 3557.83, "learn_time_ms": 1069.891, "learn_throughput": 11963.839, "synch_weights_time_ms": 11.043}, "counters": {"num_env_steps_sampled": 6067200, "num_env_steps_trained": 6067200, "num_agent_steps_sampled": 12134400, "num_agent_steps_trained": 12134400}, "done": false, "episodes_total": 15168, "training_iteration": 474, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-12", "timestamp": 1666582272, "time_this_iter_s": 3.609189748764038, "time_total_s": 1800.2743730545044, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1800.2743730545044, "timesteps_since_restore": 0, "iterations_since_restore": 474, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.380000000000003, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.5, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.35, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.37, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.35, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.35, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0003242699895054102, "policy_loss": -0.0007143347756937146, "vf_loss": 7.679488182067871, "vf_explained_var": 0.58504319190979, "kl": 0.002902800217270851, "entropy": 0.7557680010795593, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6080000, "num_env_steps_trained": 6080000, "num_agent_steps_sampled": 12160000, "num_agent_steps_trained": 12160000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 572.1, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 286.05}, "custom_metrics": {"sparse_reward_mean": 197.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.5, "shaped_reward_min": 128, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.94, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.51, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.58, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.35, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.99, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.37, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.07, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.19, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.58, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.35, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.58, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.35, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6933924476202722, "mean_inference_ms": 1.2360384651209435, "mean_action_processing_ms": 0.13313298981871072, "mean_env_wait_ms": 0.8347216666702647, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 408.0, "episode_reward_mean": 572.1, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 200.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 286.05}, "hist_stats": {"episode_reward": [627.0, 570.0, 408.0, 579.0, 411.0, 582.0, 530.0, 639.0, 587.0, 582.0, 573.0, 576.0, 579.0, 579.0, 525.0, 582.0, 525.0, 570.0, 573.0, 576.0, 576.0, 582.0, 576.0, 627.0, 525.0, 587.0, 579.0, 513.0, 576.0, 636.0, 579.0, 519.0, 579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [312.0, 315.0, 284.0, 286.0, 200.0, 208.0, 291.0, 288.0, 206.0, 205.0, 297.0, 285.0, 268.0, 262.0, 316.0, 323.0, 289.0, 298.0, 291.0, 291.0, 283.0, 290.0, 292.0, 284.0, 294.0, 285.0, 293.0, 286.0, 268.0, 257.0, 295.0, 287.0, 263.0, 262.0, 287.0, 283.0, 288.0, 285.0, 291.0, 285.0, 289.0, 287.0, 289.0, 293.0, 286.0, 290.0, 319.0, 308.0, 258.0, 267.0, 286.0, 301.0, 292.0, 287.0, 253.0, 260.0, 286.0, 290.0, 312.0, 324.0, 292.0, 287.0, 269.0, 250.0, 291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6933924476202722, "mean_inference_ms": 1.2360384651209435, "mean_action_processing_ms": 0.13313298981871072, "mean_env_wait_ms": 0.8347216666702647, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12160000, "num_agent_steps_trained": 12160000, "num_env_steps_sampled": 6080000, "num_env_steps_trained": 6080000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6080000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12160000, "timers": {"training_iteration_time_ms": 3547.816, "learn_time_ms": 1072.66, "learn_throughput": 11932.954, "synch_weights_time_ms": 11.065}, "counters": {"num_env_steps_sampled": 6080000, "num_env_steps_trained": 6080000, "num_agent_steps_sampled": 12160000, "num_agent_steps_trained": 12160000}, "done": false, "episodes_total": 15200, "training_iteration": 475, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-16", "timestamp": 1666582276, "time_this_iter_s": 3.5919501781463623, "time_total_s": 1803.8663232326508, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1803.8663232326508, "timesteps_since_restore": 0, "iterations_since_restore": 475, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.660000000000004, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.15, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.82, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.005083472467958927, "policy_loss": 0.004696611315011978, "vf_loss": 7.64980411529541, "vf_explained_var": 0.5964254140853882, "kl": 0.0038652834482491016, "entropy": 0.7562379837036133, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6092800, "num_env_steps_trained": 6092800, "num_agent_steps_sampled": 12185600, "num_agent_steps_trained": 12185600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 574.75, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.375}, "custom_metrics": {"sparse_reward_mean": 198.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.15, "shaped_reward_min": 142, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.91, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.82, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.54, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.1, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.43, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.18, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.11, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.54, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.54, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6933303687204011, "mean_inference_ms": 1.2359272963791816, "mean_action_processing_ms": 0.13312594714549067, "mean_env_wait_ms": 0.8346400835968194, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 574.75, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.375}, "hist_stats": {"episode_reward": [579.0, 527.0, 576.0, 582.0, 525.0, 633.0, 530.0, 582.0, 579.0, 519.0, 630.0, 630.0, 522.0, 582.0, 579.0, 587.0, 627.0, 573.0, 579.0, 576.0, 582.0, 576.0, 582.0, 587.0, 579.0, 573.0, 579.0, 636.0, 582.0, 530.0, 519.0, 576.0, 525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 267.0, 260.0, 291.0, 285.0, 294.0, 288.0, 261.0, 264.0, 318.0, 315.0, 265.0, 265.0, 294.0, 288.0, 296.0, 283.0, 251.0, 268.0, 316.0, 314.0, 317.0, 313.0, 259.0, 263.0, 296.0, 286.0, 289.0, 290.0, 296.0, 291.0, 306.0, 321.0, 280.0, 293.0, 291.0, 288.0, 286.0, 290.0, 286.0, 296.0, 294.0, 282.0, 297.0, 285.0, 294.0, 293.0, 283.0, 296.0, 288.0, 285.0, 294.0, 285.0, 317.0, 319.0, 292.0, 290.0, 262.0, 268.0, 257.0, 262.0, 286.0, 290.0, 265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6933303687204011, "mean_inference_ms": 1.2359272963791816, "mean_action_processing_ms": 0.13312594714549067, "mean_env_wait_ms": 0.8346400835968194, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12185600, "num_agent_steps_trained": 12185600, "num_env_steps_sampled": 6092800, "num_env_steps_trained": 6092800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6092800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12185600, "timers": {"training_iteration_time_ms": 3539.364, "learn_time_ms": 1070.222, "learn_throughput": 11960.137, "synch_weights_time_ms": 11.015}, "counters": {"num_env_steps_sampled": 6092800, "num_env_steps_trained": 6092800, "num_agent_steps_sampled": 12185600, "num_agent_steps_trained": 12185600}, "done": false, "episodes_total": 15232, "training_iteration": 476, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-20", "timestamp": 1666582280, "time_this_iter_s": 3.5145702362060547, "time_total_s": 1807.3808934688568, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1807.3808934688568, "timesteps_since_restore": 0, "iterations_since_restore": 476, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.566666666666666, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 141, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.69, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.88, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.69, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.69, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0004522355739027262, "policy_loss": 6.512660183943808e-05, "vf_loss": 7.6786627769470215, "vf_explained_var": 0.596559464931488, "kl": 0.002718728268519044, "entropy": 0.7615171670913696, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6105600, "num_env_steps_trained": 6105600, "num_agent_steps_sampled": 12211200, "num_agent_steps_trained": 12211200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 573.79, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.895}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 176.59, "shaped_reward_min": 141, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.7, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.01, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.58, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.86, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 20, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.02, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.36, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.69, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.19, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.88, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.11, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.84, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.1, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.36, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.69, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.36, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.69, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6932739430598015, "mean_inference_ms": 1.2358048086334636, "mean_action_processing_ms": 0.13311876846202841, "mean_env_wait_ms": 0.8345568859029008, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 462.0, "episode_reward_mean": 573.79, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 286.895}, "hist_stats": {"episode_reward": [525.0, 582.0, 570.0, 573.0, 579.0, 627.0, 522.0, 579.0, 530.0, 510.0, 576.0, 587.0, 579.0, 573.0, 627.0, 582.0, 576.0, 579.0, 582.0, 587.0, 639.0, 633.0, 473.0, 627.0, 630.0, 582.0, 582.0, 582.0, 582.0, 579.0, 525.0, 573.0, 522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [265.0, 260.0, 290.0, 292.0, 283.0, 287.0, 283.0, 290.0, 293.0, 286.0, 311.0, 316.0, 255.0, 267.0, 286.0, 293.0, 267.0, 263.0, 258.0, 252.0, 285.0, 291.0, 285.0, 302.0, 286.0, 293.0, 278.0, 295.0, 313.0, 314.0, 293.0, 289.0, 287.0, 289.0, 292.0, 287.0, 293.0, 289.0, 301.0, 286.0, 320.0, 319.0, 319.0, 314.0, 242.0, 231.0, 313.0, 314.0, 319.0, 311.0, 291.0, 291.0, 291.0, 291.0, 290.0, 292.0, 293.0, 289.0, 292.0, 287.0, 260.0, 265.0, 288.0, 285.0, 247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6932739430598015, "mean_inference_ms": 1.2358048086334636, "mean_action_processing_ms": 0.13311876846202841, "mean_env_wait_ms": 0.8345568859029008, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12211200, "num_agent_steps_trained": 12211200, "num_env_steps_sampled": 6105600, "num_env_steps_trained": 6105600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6105600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12211200, "timers": {"training_iteration_time_ms": 3534.379, "learn_time_ms": 1066.085, "learn_throughput": 12006.542, "synch_weights_time_ms": 11.596}, "counters": {"num_env_steps_sampled": 6105600, "num_env_steps_trained": 6105600, "num_agent_steps_sampled": 12211200, "num_agent_steps_trained": 12211200}, "done": false, "episodes_total": 15264, "training_iteration": 477, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-23", "timestamp": 1666582283, "time_this_iter_s": 3.535604476928711, "time_total_s": 1810.9164979457855, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1810.9164979457855, "timesteps_since_restore": 0, "iterations_since_restore": 477, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.84, "ram_util_percent": 10.620000000000001}} +{"custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.9, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.78, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.3, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.96, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.3, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.3, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002086335327476263, "policy_loss": 0.0016811969690024853, "vf_loss": 7.846938133239746, "vf_explained_var": 0.5670309066772461, "kl": 0.003477412974461913, "entropy": 0.7591124176979065, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6118400, "num_env_steps_trained": 6118400, "num_agent_steps_sampled": 12236800, "num_agent_steps_trained": 12236800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 462.0, "episode_reward_mean": 571.9, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.95}, "custom_metrics": {"sparse_reward_mean": 198.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 175.9, "shaped_reward_min": 141, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.65, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.9, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.55, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.78, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.3, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.64, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 20, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 10, "dish_pickup_agent_1_mean": 5.28, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.08, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 3, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.96, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.3, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.64, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 20, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.3, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.64, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 20, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6932216431064484, "mean_inference_ms": 1.235692735455491, "mean_action_processing_ms": 0.13311392444286807, "mean_env_wait_ms": 0.8344878818156225, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 462.0, "episode_reward_mean": 571.9, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 226.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.95}, "hist_stats": {"episode_reward": [522.0, 516.0, 627.0, 627.0, 525.0, 567.0, 579.0, 627.0, 576.0, 522.0, 630.0, 587.0, 633.0, 462.0, 582.0, 570.0, 576.0, 522.0, 582.0, 570.0, 522.0, 582.0, 582.0, 633.0, 630.0, 579.0, 527.0, 579.0, 582.0, 579.0, 579.0, 567.0, 579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [247.0, 275.0, 265.0, 251.0, 322.0, 305.0, 310.0, 317.0, 265.0, 260.0, 287.0, 280.0, 291.0, 288.0, 314.0, 313.0, 288.0, 288.0, 273.0, 249.0, 315.0, 315.0, 286.0, 301.0, 321.0, 312.0, 236.0, 226.0, 291.0, 291.0, 276.0, 294.0, 289.0, 287.0, 263.0, 259.0, 292.0, 290.0, 284.0, 286.0, 267.0, 255.0, 291.0, 291.0, 289.0, 293.0, 317.0, 316.0, 317.0, 313.0, 288.0, 291.0, 265.0, 262.0, 292.0, 287.0, 296.0, 286.0, 294.0, 285.0, 288.0, 291.0, 281.0, 286.0, 287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6932216431064484, "mean_inference_ms": 1.235692735455491, "mean_action_processing_ms": 0.13311392444286807, "mean_env_wait_ms": 0.8344878818156225, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12236800, "num_agent_steps_trained": 12236800, "num_env_steps_sampled": 6118400, "num_env_steps_trained": 6118400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6118400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12236800, "timers": {"training_iteration_time_ms": 3535.795, "learn_time_ms": 1064.167, "learn_throughput": 12028.182, "synch_weights_time_ms": 10.668}, "counters": {"num_env_steps_sampled": 6118400, "num_env_steps_trained": 6118400, "num_agent_steps_sampled": 12236800, "num_agent_steps_trained": 12236800}, "done": false, "episodes_total": 15296, "training_iteration": 478, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-27", "timestamp": 1666582287, "time_this_iter_s": 3.668811082839966, "time_total_s": 1814.5853090286255, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1814.5853090286255, "timesteps_since_restore": 0, "iterations_since_restore": 478, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.86, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.02, "shaped_reward_min": 141, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.73, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.88, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.75, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.75, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.75, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0014865855919197202, "policy_loss": 0.00110257463529706, "vf_loss": 7.5981879234313965, "vf_explained_var": 0.6002695560455322, "kl": 0.003477748716250062, "entropy": 0.751615047454834, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6131200, "num_env_steps_trained": 6131200, "num_agent_steps_sampled": 12262400, "num_agent_steps_trained": 12262400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 501.0, "episode_reward_mean": 576.42, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.21}, "custom_metrics": {"sparse_reward_mean": 199.2, "sparse_reward_min": 180, "sparse_reward_max": 220, "shaped_reward_mean": 178.02, "shaped_reward_min": 141, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.85, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 16.03, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.73, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.88, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.55, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.75, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.21, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.23, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.06, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 2, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.92, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.16, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.89, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.08, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.55, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.75, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.55, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.75, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931697245030041, "mean_inference_ms": 1.235632701432748, "mean_action_processing_ms": 0.13310679759987598, "mean_env_wait_ms": 0.8344476003245105, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 501.0, "episode_reward_mean": 576.42, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 248.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 288.21}, "hist_stats": {"episode_reward": [579.0, 582.0, 519.0, 582.0, 533.0, 522.0, 573.0, 582.0, 579.0, 576.0, 630.0, 501.0, 587.0, 633.0, 630.0, 579.0, 579.0, 579.0, 530.0, 582.0, 573.0, 570.0, 530.0, 582.0, 519.0, 587.0, 525.0, 633.0, 630.0, 630.0, 525.0, 519.0, 582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 292.0, 292.0, 290.0, 252.0, 267.0, 289.0, 293.0, 263.0, 270.0, 258.0, 264.0, 287.0, 286.0, 292.0, 290.0, 285.0, 294.0, 286.0, 290.0, 314.0, 316.0, 253.0, 248.0, 290.0, 297.0, 316.0, 317.0, 316.0, 314.0, 289.0, 290.0, 297.0, 282.0, 287.0, 292.0, 263.0, 267.0, 289.0, 293.0, 286.0, 287.0, 284.0, 286.0, 266.0, 264.0, 299.0, 283.0, 249.0, 270.0, 288.0, 299.0, 263.0, 262.0, 320.0, 313.0, 316.0, 314.0, 311.0, 319.0, 263.0, 262.0, 248.0, 271.0, 289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931697245030041, "mean_inference_ms": 1.235632701432748, "mean_action_processing_ms": 0.13310679759987598, "mean_env_wait_ms": 0.8344476003245105, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12262400, "num_agent_steps_trained": 12262400, "num_env_steps_sampled": 6131200, "num_env_steps_trained": 6131200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6131200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12262400, "timers": {"training_iteration_time_ms": 3557.232, "learn_time_ms": 1062.71, "learn_throughput": 12044.681, "synch_weights_time_ms": 10.597}, "counters": {"num_env_steps_sampled": 6131200, "num_env_steps_trained": 6131200, "num_agent_steps_sampled": 12262400, "num_agent_steps_trained": 12262400}, "done": false, "episodes_total": 15328, "training_iteration": 479, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-31", "timestamp": 1666582291, "time_this_iter_s": 3.794663190841675, "time_total_s": 1818.3799722194672, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1818.3799722194672, "timesteps_since_restore": 0, "iterations_since_restore": 479, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.8, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.87, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.61, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.13, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.61, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.61, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0032907375134527683, "policy_loss": 0.002880503423511982, "vf_loss": 7.935708045959473, "vf_explained_var": 0.5589988231658936, "kl": 0.0031868487130850554, "entropy": 0.766674816608429, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6144000, "num_env_steps_trained": 6144000, "num_agent_steps_sampled": 12288000, "num_agent_steps_trained": 12288000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 568.81, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.405}, "custom_metrics": {"sparse_reward_mean": 196.4, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.01, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.87, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.37, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.68, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.61, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.24, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.2, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.13, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.09, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.94, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.9, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.61, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.61, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931151782683085, "mean_inference_ms": 1.2356500040177392, "mean_action_processing_ms": 0.13309948751345646, "mean_env_wait_ms": 0.8344131259950188, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 568.81, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 284.405}, "hist_stats": {"episode_reward": [582.0, 582.0, 582.0, 558.0, 587.0, 576.0, 519.0, 527.0, 579.0, 573.0, 576.0, 627.0, 582.0, 573.0, 576.0, 579.0, 579.0, 530.0, 579.0, 576.0, 582.0, 573.0, 576.0, 525.0, 519.0, 582.0, 627.0, 516.0, 579.0, 579.0, 573.0, 530.0, 627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [289.0, 293.0, 288.0, 294.0, 289.0, 293.0, 281.0, 277.0, 299.0, 288.0, 299.0, 277.0, 257.0, 262.0, 260.0, 267.0, 291.0, 288.0, 296.0, 277.0, 292.0, 284.0, 316.0, 311.0, 293.0, 289.0, 280.0, 293.0, 296.0, 280.0, 296.0, 283.0, 291.0, 288.0, 269.0, 261.0, 299.0, 280.0, 292.0, 284.0, 297.0, 285.0, 288.0, 285.0, 293.0, 283.0, 270.0, 255.0, 265.0, 254.0, 299.0, 283.0, 303.0, 324.0, 260.0, 256.0, 288.0, 291.0, 285.0, 294.0, 293.0, 280.0, 260.0, 270.0, 313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6931151782683085, "mean_inference_ms": 1.2356500040177392, "mean_action_processing_ms": 0.13309948751345646, "mean_env_wait_ms": 0.8344131259950188, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12288000, "num_agent_steps_trained": 12288000, "num_env_steps_sampled": 6144000, "num_env_steps_trained": 6144000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6144000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12288000, "timers": {"training_iteration_time_ms": 3565.377, "learn_time_ms": 1052.259, "learn_throughput": 12164.3, "synch_weights_time_ms": 11.253}, "counters": {"num_env_steps_sampled": 6144000, "num_env_steps_trained": 6144000, "num_agent_steps_sampled": 12288000, "num_agent_steps_trained": 12288000}, "done": false, "episodes_total": 15360, "training_iteration": 480, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-35", "timestamp": 1666582295, "time_this_iter_s": 3.6855309009552, "time_total_s": 1822.0655031204224, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1822.0655031204224, "timesteps_since_restore": 0, "iterations_since_restore": 480, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.32, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.8, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.74, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.5, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0016352785751223564, "policy_loss": 0.0012512167450040579, "vf_loss": 7.683942794799805, "vf_explained_var": 0.5772947072982788, "kl": 0.003058413974940777, "entropy": 0.7686662077903748, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6156800, "num_env_steps_trained": 6156800, "num_agent_steps_sampled": 12313600, "num_agent_steps_trained": 12313600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 570.8, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.4}, "custom_metrics": {"sparse_reward_mean": 197.0, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.8, "shaped_reward_min": 131, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.71, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.74, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.5, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.45, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.17, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.27, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.87, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.81, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.05, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.45, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.45, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6930632322567098, "mean_inference_ms": 1.2356661180576958, "mean_action_processing_ms": 0.13309089723525241, "mean_env_wait_ms": 0.8343735986878177, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 411.0, "episode_reward_mean": 570.8, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 324.0}, "policy_reward_mean": {"ppo": 285.4}, "hist_stats": {"episode_reward": [627.0, 573.0, 582.0, 582.0, 582.0, 579.0, 630.0, 636.0, 582.0, 582.0, 582.0, 582.0, 587.0, 582.0, 579.0, 582.0, 573.0, 582.0, 582.0, 582.0, 636.0, 533.0, 576.0, 582.0, 573.0, 627.0, 627.0, 582.0, 579.0, 639.0, 525.0, 582.0, 587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [313.0, 314.0, 288.0, 285.0, 289.0, 293.0, 293.0, 289.0, 292.0, 290.0, 291.0, 288.0, 316.0, 314.0, 319.0, 317.0, 287.0, 295.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 293.0, 294.0, 289.0, 293.0, 288.0, 291.0, 291.0, 291.0, 281.0, 292.0, 289.0, 293.0, 289.0, 293.0, 297.0, 285.0, 317.0, 319.0, 271.0, 262.0, 292.0, 284.0, 291.0, 291.0, 285.0, 288.0, 311.0, 316.0, 308.0, 319.0, 292.0, 290.0, 294.0, 285.0, 324.0, 315.0, 264.0, 261.0, 286.0, 296.0, 288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6930632322567098, "mean_inference_ms": 1.2356661180576958, "mean_action_processing_ms": 0.13309089723525241, "mean_env_wait_ms": 0.8343735986878177, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12313600, "num_agent_steps_trained": 12313600, "num_env_steps_sampled": 6156800, "num_env_steps_trained": 6156800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6156800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12313600, "timers": {"training_iteration_time_ms": 3557.096, "learn_time_ms": 1047.153, "learn_throughput": 12223.622, "synch_weights_time_ms": 11.191}, "counters": {"num_env_steps_sampled": 6156800, "num_env_steps_trained": 6156800, "num_agent_steps_sampled": 12313600, "num_agent_steps_trained": 12313600}, "done": false, "episodes_total": 15392, "training_iteration": 481, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-39", "timestamp": 1666582299, "time_this_iter_s": 3.5412771701812744, "time_total_s": 1825.6067802906036, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1825.6067802906036, "timesteps_since_restore": 0, "iterations_since_restore": 481, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.53333333333333, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.69, "shaped_reward_min": 131, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.72, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.22, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.17, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.17, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.17, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001813309732824564, "policy_loss": -0.0022062822245061398, "vf_loss": 7.713008880615234, "vf_explained_var": 0.5789626240730286, "kl": 0.0033301603980362415, "entropy": 0.7566564679145813, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6169600, "num_env_steps_trained": 6169600, "num_agent_steps_sampled": 12339200, "num_agent_steps_trained": 12339200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 411.0, "episode_reward_mean": 564.29, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.145}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.69, "shaped_reward_min": 131, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.72, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.46, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 19, "useful_onion_pickup_agent_0_mean": 16.59, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.22, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 19, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.4, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.17, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 19, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.35, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.95, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.21, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.07, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 6, "soup_pickup_agent_1_mean": 5.11, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.72, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 6, "soup_delivery_agent_1_mean": 5.03, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.4, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.17, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 19, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.4, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.17, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 19, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6930222020217434, "mean_inference_ms": 1.2356416687774716, "mean_action_processing_ms": 0.13308365283071852, "mean_env_wait_ms": 0.8343179738544374, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 411.0, "episode_reward_mean": 564.29, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 201.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.145}, "hist_stats": {"episode_reward": [587.0, 582.0, 582.0, 579.0, 573.0, 573.0, 579.0, 411.0, 573.0, 579.0, 582.0, 530.0, 627.0, 530.0, 525.0, 533.0, 630.0, 570.0, 576.0, 530.0, 570.0, 530.0, 411.0, 522.0, 413.0, 579.0, 587.0, 579.0, 525.0, 525.0, 519.0, 579.0, 579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 299.0, 291.0, 291.0, 293.0, 289.0, 289.0, 290.0, 285.0, 288.0, 283.0, 290.0, 295.0, 284.0, 208.0, 203.0, 288.0, 285.0, 286.0, 293.0, 297.0, 285.0, 265.0, 265.0, 316.0, 311.0, 264.0, 266.0, 258.0, 267.0, 270.0, 263.0, 316.0, 314.0, 285.0, 285.0, 291.0, 285.0, 261.0, 269.0, 285.0, 285.0, 265.0, 265.0, 208.0, 203.0, 262.0, 260.0, 212.0, 201.0, 296.0, 283.0, 296.0, 291.0, 286.0, 293.0, 263.0, 262.0, 263.0, 262.0, 265.0, 254.0, 294.0, 285.0, 288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6930222020217434, "mean_inference_ms": 1.2356416687774716, "mean_action_processing_ms": 0.13308365283071852, "mean_env_wait_ms": 0.8343179738544374, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12339200, "num_agent_steps_trained": 12339200, "num_env_steps_sampled": 6169600, "num_env_steps_trained": 6169600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6169600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12339200, "timers": {"training_iteration_time_ms": 3554.381, "learn_time_ms": 1045.846, "learn_throughput": 12238.898, "synch_weights_time_ms": 11.105}, "counters": {"num_env_steps_sampled": 6169600, "num_env_steps_trained": 6169600, "num_agent_steps_sampled": 12339200, "num_agent_steps_trained": 12339200}, "done": false, "episodes_total": 15424, "training_iteration": 482, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-42", "timestamp": 1666582302, "time_this_iter_s": 3.5798044204711914, "time_total_s": 1829.1865847110748, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1829.1865847110748, "timesteps_since_restore": 0, "iterations_since_restore": 482, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.7, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.69, "shaped_reward_min": 139, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.88, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.99, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.91, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0008528552716597915, "policy_loss": 0.0004703389131464064, "vf_loss": 7.599314212799072, "vf_explained_var": 0.6006312370300293, "kl": 0.0031734949443489313, "entropy": 0.7548311352729797, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6182400, "num_env_steps_trained": 6182400, "num_agent_steps_sampled": 12364800, "num_agent_steps_trained": 12364800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 459.0, "episode_reward_mean": 575.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.845}, "custom_metrics": {"sparse_reward_mean": 199.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 177.69, "shaped_reward_min": 139, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.88, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.99, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.74, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.78, "useful_onion_pickup_agent_1_min": 8, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.05, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.53, "potting_onion_agent_0_min": 10, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.65, "potting_onion_agent_1_min": 8, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.2, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.36, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.08, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.22, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.95, "soup_pickup_agent_0_min": 2, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.91, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.53, "optimal_onion_potting_agent_0_min": 10, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.65, "optimal_onion_potting_agent_1_min": 8, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.53, "viable_onion_potting_agent_0_min": 10, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.65, "viable_onion_potting_agent_1_min": 8, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692977369639183, "mean_inference_ms": 1.2355441667271112, "mean_action_processing_ms": 0.13307732437620612, "mean_env_wait_ms": 0.8342554470754044, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 459.0, "episode_reward_mean": 575.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 287.845}, "hist_stats": {"episode_reward": [579.0, 525.0, 576.0, 579.0, 582.0, 576.0, 579.0, 570.0, 525.0, 579.0, 579.0, 573.0, 573.0, 587.0, 636.0, 582.0, 579.0, 576.0, 525.0, 579.0, 570.0, 582.0, 573.0, 582.0, 576.0, 587.0, 582.0, 630.0, 587.0, 522.0, 582.0, 579.0, 510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 260.0, 265.0, 289.0, 287.0, 296.0, 283.0, 289.0, 293.0, 288.0, 288.0, 294.0, 285.0, 283.0, 287.0, 271.0, 254.0, 293.0, 286.0, 291.0, 288.0, 286.0, 287.0, 285.0, 288.0, 296.0, 291.0, 314.0, 322.0, 294.0, 288.0, 293.0, 286.0, 293.0, 283.0, 263.0, 262.0, 288.0, 291.0, 284.0, 286.0, 295.0, 287.0, 280.0, 293.0, 297.0, 285.0, 291.0, 285.0, 296.0, 291.0, 288.0, 294.0, 315.0, 315.0, 296.0, 291.0, 260.0, 262.0, 293.0, 289.0, 291.0, 288.0, 253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692977369639183, "mean_inference_ms": 1.2355441667271112, "mean_action_processing_ms": 0.13307732437620612, "mean_env_wait_ms": 0.8342554470754044, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12364800, "num_agent_steps_trained": 12364800, "num_env_steps_sampled": 6182400, "num_env_steps_trained": 6182400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6182400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12364800, "timers": {"training_iteration_time_ms": 3558.941, "learn_time_ms": 1043.607, "learn_throughput": 12265.148, "synch_weights_time_ms": 11.435}, "counters": {"num_env_steps_sampled": 6182400, "num_env_steps_trained": 6182400, "num_agent_steps_sampled": 12364800, "num_agent_steps_trained": 12364800}, "done": false, "episodes_total": 15456, "training_iteration": 483, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-46", "timestamp": 1666582306, "time_this_iter_s": 3.5927608013153076, "time_total_s": 1832.7793455123901, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1832.7793455123901, "timesteps_since_restore": 0, "iterations_since_restore": 483, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.919999999999998, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 174.85, "shaped_reward_min": 9, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.65, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.48, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.41, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.3, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.3, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.3, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0031761766877025366, "policy_loss": 0.0027809746097773314, "vf_loss": 7.762106895446777, "vf_explained_var": 0.5784608721733093, "kl": 0.0038599662948399782, "entropy": 0.7620162963867188, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6195200, "num_env_steps_trained": 6195200, "num_agent_steps_sampled": 12390400, "num_agent_steps_trained": 12390400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 9.0, "episode_reward_mean": 566.45, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.225}, "custom_metrics": {"sparse_reward_mean": 195.8, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 174.85, "shaped_reward_min": 9, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.69, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.65, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.48, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.41, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.12, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.32, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.3, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.4, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.25, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 2, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.06, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.79, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.32, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.3, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.32, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.3, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692940406076851, "mean_inference_ms": 1.2354490487938525, "mean_action_processing_ms": 0.1330713413614881, "mean_env_wait_ms": 0.8341982079274745, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 9.0, "episode_reward_mean": 566.45, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.225}, "hist_stats": {"episode_reward": [510.0, 582.0, 579.0, 579.0, 582.0, 519.0, 576.0, 630.0, 582.0, 582.0, 533.0, 573.0, 582.0, 579.0, 576.0, 530.0, 627.0, 579.0, 582.0, 584.0, 570.0, 582.0, 582.0, 579.0, 525.0, 570.0, 522.0, 579.0, 587.0, 573.0, 582.0, 570.0, 459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [253.0, 257.0, 291.0, 291.0, 283.0, 296.0, 288.0, 291.0, 292.0, 290.0, 267.0, 252.0, 296.0, 280.0, 319.0, 311.0, 299.0, 283.0, 289.0, 293.0, 268.0, 265.0, 285.0, 288.0, 294.0, 288.0, 292.0, 287.0, 297.0, 279.0, 273.0, 257.0, 316.0, 311.0, 287.0, 292.0, 292.0, 290.0, 288.0, 296.0, 280.0, 290.0, 291.0, 291.0, 291.0, 291.0, 288.0, 291.0, 260.0, 265.0, 274.0, 296.0, 257.0, 265.0, 286.0, 293.0, 293.0, 294.0, 281.0, 292.0, 288.0, 294.0, 286.0, 284.0, 240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692940406076851, "mean_inference_ms": 1.2354490487938525, "mean_action_processing_ms": 0.1330713413614881, "mean_env_wait_ms": 0.8341982079274745, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12390400, "num_agent_steps_trained": 12390400, "num_env_steps_sampled": 6195200, "num_env_steps_trained": 6195200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6195200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12390400, "timers": {"training_iteration_time_ms": 3552.513, "learn_time_ms": 1043.736, "learn_throughput": 12263.643, "synch_weights_time_ms": 11.18}, "counters": {"num_env_steps_sampled": 6195200, "num_env_steps_trained": 6195200, "num_agent_steps_sampled": 12390400, "num_agent_steps_trained": 12390400}, "done": false, "episodes_total": 15488, "training_iteration": 484, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-50", "timestamp": 1666582310, "time_this_iter_s": 3.5351099967956543, "time_total_s": 1836.3144555091858, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1836.3144555091858, "timesteps_since_restore": 0, "iterations_since_restore": 484, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 174.73, "shaped_reward_min": 9, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001401905552484095, "policy_loss": -0.0017940457910299301, "vf_loss": 7.739469051361084, "vf_explained_var": 0.5467299818992615, "kl": 0.0025778058916330338, "entropy": 0.7636134624481201, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6208000, "num_env_steps_trained": 6208000, "num_agent_steps_sampled": 12416000, "num_agent_steps_trained": 12416000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 9.0, "episode_reward_mean": 565.53, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.765}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 174.73, "shaped_reward_min": 9, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.48, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.81, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.24, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 22, "useful_onion_pickup_agent_1_mean": 15.57, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.11, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.04, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.17, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.98, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.24, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.82, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.04, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.78, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.17, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.17, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6929093261383754, "mean_inference_ms": 1.235359682696947, "mean_action_processing_ms": 0.13306645221527427, "mean_env_wait_ms": 0.8341424568541566, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 9.0, "episode_reward_mean": 565.53, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 282.765}, "hist_stats": {"episode_reward": [459.0, 530.0, 576.0, 576.0, 576.0, 576.0, 573.0, 587.0, 630.0, 579.0, 582.0, 576.0, 579.0, 582.0, 576.0, 587.0, 582.0, 633.0, 582.0, 582.0, 579.0, 636.0, 570.0, 630.0, 522.0, 579.0, 582.0, 579.0, 582.0, 576.0, 519.0, 570.0, 573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [240.0, 219.0, 261.0, 269.0, 288.0, 288.0, 283.0, 293.0, 286.0, 290.0, 293.0, 283.0, 291.0, 282.0, 290.0, 297.0, 308.0, 322.0, 297.0, 282.0, 296.0, 286.0, 288.0, 288.0, 295.0, 284.0, 293.0, 289.0, 294.0, 282.0, 290.0, 297.0, 291.0, 291.0, 316.0, 317.0, 290.0, 292.0, 294.0, 288.0, 293.0, 286.0, 317.0, 319.0, 282.0, 288.0, 317.0, 313.0, 265.0, 257.0, 291.0, 288.0, 294.0, 288.0, 294.0, 285.0, 296.0, 286.0, 290.0, 286.0, 258.0, 261.0, 279.0, 291.0, 287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6929093261383754, "mean_inference_ms": 1.235359682696947, "mean_action_processing_ms": 0.13306645221527427, "mean_env_wait_ms": 0.8341424568541566, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12416000, "num_agent_steps_trained": 12416000, "num_env_steps_sampled": 6208000, "num_env_steps_trained": 6208000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6208000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12416000, "timers": {"training_iteration_time_ms": 3554.834, "learn_time_ms": 1048.901, "learn_throughput": 12203.248, "synch_weights_time_ms": 11.264}, "counters": {"num_env_steps_sampled": 6208000, "num_env_steps_trained": 6208000, "num_agent_steps_sampled": 12416000, "num_agent_steps_trained": 12416000}, "done": false, "episodes_total": 15520, "training_iteration": 485, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-54", "timestamp": 1666582314, "time_this_iter_s": 3.635488986968994, "time_total_s": 1839.9499444961548, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1839.9499444961548, "timesteps_since_restore": 0, "iterations_since_restore": 485, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.9, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 172.63, "shaped_reward_min": 9, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.44, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.19, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.64, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0027828095480799675, "policy_loss": -0.0031895372085273266, "vf_loss": 7.829740524291992, "vf_explained_var": 0.5534740686416626, "kl": 0.003179178573191166, "entropy": 0.7524949312210083, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6220800, "num_env_steps_trained": 6220800, "num_agent_steps_sampled": 12441600, "num_agent_steps_trained": 12441600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 9.0, "episode_reward_mean": 559.43, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.715}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 0, "sparse_reward_max": 220, "shaped_reward_mean": 172.63, "shaped_reward_min": 9, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.36, "onion_pickup_agent_0_min": 2, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.44, "onion_pickup_agent_1_min": 2, "onion_pickup_agent_1_max": 23, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 2, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.19, "useful_onion_pickup_agent_1_min": 2, "useful_onion_pickup_agent_1_max": 23, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.08, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.03, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.05, "potting_onion_agent_0_min": 2, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.1, "potting_onion_agent_1_min": 1, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.87, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.48, "dish_pickup_agent_1_min": 0, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.81, "useful_dish_pickup_agent_0_min": 0, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.04, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.64, "soup_pickup_agent_0_min": 0, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.1, "soup_pickup_agent_1_min": 0, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.61, "soup_delivery_agent_0_min": 0, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 0, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.05, "optimal_onion_potting_agent_0_min": 2, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.1, "optimal_onion_potting_agent_1_min": 1, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.05, "viable_onion_potting_agent_0_min": 2, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.1, "viable_onion_potting_agent_1_min": 1, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928998996816387, "mean_inference_ms": 1.2352711461877957, "mean_action_processing_ms": 0.13306123880522405, "mean_env_wait_ms": 0.8340857422690848, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 9.0, "episode_reward_mean": 559.43, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 3.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 279.715}, "hist_stats": {"episode_reward": [573.0, 633.0, 582.0, 636.0, 525.0, 587.0, 9.0, 627.0, 533.0, 573.0, 579.0, 573.0, 582.0, 582.0, 582.0, 576.0, 630.0, 530.0, 522.0, 582.0, 573.0, 570.0, 582.0, 630.0, 582.0, 582.0, 570.0, 582.0, 576.0, 579.0, 579.0, 573.0, 411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [287.0, 286.0, 314.0, 319.0, 291.0, 291.0, 322.0, 314.0, 265.0, 260.0, 288.0, 299.0, 6.0, 3.0, 319.0, 308.0, 268.0, 265.0, 280.0, 293.0, 283.0, 296.0, 283.0, 290.0, 292.0, 290.0, 294.0, 288.0, 292.0, 290.0, 283.0, 293.0, 317.0, 313.0, 258.0, 272.0, 274.0, 248.0, 291.0, 291.0, 286.0, 287.0, 295.0, 275.0, 289.0, 293.0, 314.0, 316.0, 294.0, 288.0, 284.0, 298.0, 288.0, 282.0, 291.0, 291.0, 294.0, 282.0, 289.0, 290.0, 292.0, 287.0, 287.0, 286.0, 208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928998996816387, "mean_inference_ms": 1.2352711461877957, "mean_action_processing_ms": 0.13306123880522405, "mean_env_wait_ms": 0.8340857422690848, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12441600, "num_agent_steps_trained": 12441600, "num_env_steps_sampled": 6220800, "num_env_steps_trained": 6220800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6220800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12441600, "timers": {"training_iteration_time_ms": 3569.324, "learn_time_ms": 1047.351, "learn_throughput": 12221.315, "synch_weights_time_ms": 11.468}, "counters": {"num_env_steps_sampled": 6220800, "num_env_steps_trained": 6220800, "num_agent_steps_sampled": 12441600, "num_agent_steps_trained": 12441600}, "done": false, "episodes_total": 15552, "training_iteration": 486, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-31-57", "timestamp": 1666582317, "time_this_iter_s": 3.665339469909668, "time_total_s": 1843.6152839660645, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1843.6152839660645, "timesteps_since_restore": 0, "iterations_since_restore": 486, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.933333333333334, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.09, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.59, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.2, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.63, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.2, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.2, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0029020761139690876, "policy_loss": 0.002499224850907922, "vf_loss": 7.774298667907715, "vf_explained_var": 0.5695394277572632, "kl": 0.002982937265187502, "entropy": 0.7491560578346252, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6233600, "num_env_steps_trained": 6233600, "num_agent_steps_sampled": 12467200, "num_agent_steps_trained": 12467200}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 563.69, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 197.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.845}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 174.09, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.57, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.59, "onion_pickup_agent_1_min": 11, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 9, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 11, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.06, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.24, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.2, "potting_onion_agent_1_min": 11, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.45, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.84, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.35, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.67, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.17, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.63, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.12, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.24, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.2, "optimal_onion_potting_agent_1_min": 11, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.24, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.2, "viable_onion_potting_agent_1_min": 11, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692892027218891, "mean_inference_ms": 1.235187879018693, "mean_action_processing_ms": 0.13305748815812188, "mean_env_wait_ms": 0.8340378901116045, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 563.69, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 197.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 281.845}, "hist_stats": {"episode_reward": [411.0, 459.0, 465.0, 582.0, 522.0, 525.0, 582.0, 627.0, 576.0, 579.0, 525.0, 522.0, 530.0, 522.0, 639.0, 633.0, 582.0, 579.0, 587.0, 522.0, 630.0, 522.0, 579.0, 579.0, 519.0, 576.0, 579.0, 582.0, 522.0, 582.0, 582.0, 579.0, 627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [208.0, 203.0, 226.0, 233.0, 231.0, 234.0, 291.0, 291.0, 251.0, 271.0, 258.0, 267.0, 292.0, 290.0, 306.0, 321.0, 288.0, 288.0, 286.0, 293.0, 261.0, 264.0, 265.0, 257.0, 267.0, 263.0, 267.0, 255.0, 317.0, 322.0, 319.0, 314.0, 293.0, 289.0, 285.0, 294.0, 291.0, 296.0, 263.0, 259.0, 323.0, 307.0, 257.0, 265.0, 289.0, 290.0, 289.0, 290.0, 263.0, 256.0, 286.0, 290.0, 286.0, 293.0, 291.0, 291.0, 269.0, 253.0, 286.0, 296.0, 288.0, 294.0, 288.0, 291.0, 306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692892027218891, "mean_inference_ms": 1.235187879018693, "mean_action_processing_ms": 0.13305748815812188, "mean_env_wait_ms": 0.8340378901116045, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12467200, "num_agent_steps_trained": 12467200, "num_env_steps_sampled": 6233600, "num_env_steps_trained": 6233600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6233600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12467200, "timers": {"training_iteration_time_ms": 3579.273, "learn_time_ms": 1045.789, "learn_throughput": 12239.561, "synch_weights_time_ms": 11.168}, "counters": {"num_env_steps_sampled": 6233600, "num_env_steps_trained": 6233600, "num_agent_steps_sampled": 12467200, "num_agent_steps_trained": 12467200}, "done": false, "episodes_total": 15584, "training_iteration": 487, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-01", "timestamp": 1666582321, "time_this_iter_s": 3.6495282649993896, "time_total_s": 1847.2648122310638, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1847.2648122310638, "timesteps_since_restore": 0, "iterations_since_restore": 487, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.580000000000002, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.64, "shaped_reward_min": 113, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.39, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.72, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.39, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.92, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.39, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.39, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.002688134554773569, "policy_loss": -0.0030803410336375237, "vf_loss": 7.730833053588867, "vf_explained_var": 0.5763280391693115, "kl": 0.0021167888771742582, "entropy": 0.7617533206939697, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6246400, "num_env_steps_trained": 6246400, "num_agent_steps_sampled": 12492800, "num_agent_steps_trained": 12492800}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 353.0, "episode_reward_mean": 565.44, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 176.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.72}, "custom_metrics": {"sparse_reward_mean": 195.4, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 174.64, "shaped_reward_min": 113, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.39, "onion_pickup_agent_0_min": 10, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.72, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.3, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.59, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.08, "potting_onion_agent_0_min": 9, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.39, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.02, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.41, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.92, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.12, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.04, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.08, "optimal_onion_potting_agent_0_min": 9, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.39, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.08, "viable_onion_potting_agent_0_min": 9, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.39, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928745322580707, "mean_inference_ms": 1.2351036526199097, "mean_action_processing_ms": 0.13305333665262153, "mean_env_wait_ms": 0.8339880958234599, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 353.0, "episode_reward_mean": 565.44, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 176.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 282.72}, "hist_stats": {"episode_reward": [627.0, 582.0, 584.0, 519.0, 573.0, 453.0, 576.0, 525.0, 525.0, 639.0, 465.0, 582.0, 405.0, 522.0, 530.0, 573.0, 582.0, 519.0, 576.0, 630.0, 576.0, 630.0, 627.0, 525.0, 573.0, 579.0, 570.0, 525.0, 573.0, 582.0, 570.0, 579.0, 525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [306.0, 321.0, 288.0, 294.0, 289.0, 295.0, 262.0, 257.0, 290.0, 283.0, 224.0, 229.0, 285.0, 291.0, 257.0, 268.0, 260.0, 265.0, 317.0, 322.0, 231.0, 234.0, 291.0, 291.0, 208.0, 197.0, 260.0, 262.0, 265.0, 265.0, 287.0, 286.0, 289.0, 293.0, 252.0, 267.0, 291.0, 285.0, 315.0, 315.0, 292.0, 284.0, 314.0, 316.0, 314.0, 313.0, 258.0, 267.0, 282.0, 291.0, 285.0, 294.0, 288.0, 282.0, 260.0, 265.0, 294.0, 279.0, 288.0, 294.0, 288.0, 282.0, 280.0, 299.0, 261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928745322580707, "mean_inference_ms": 1.2351036526199097, "mean_action_processing_ms": 0.13305333665262153, "mean_env_wait_ms": 0.8339880958234599, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12492800, "num_agent_steps_trained": 12492800, "num_env_steps_sampled": 6246400, "num_env_steps_trained": 6246400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6246400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12492800, "timers": {"training_iteration_time_ms": 3568.033, "learn_time_ms": 1043.184, "learn_throughput": 12270.124, "synch_weights_time_ms": 12.104}, "counters": {"num_env_steps_sampled": 6246400, "num_env_steps_trained": 6246400, "num_agent_steps_sampled": 12492800, "num_agent_steps_trained": 12492800}, "done": false, "episodes_total": 15616, "training_iteration": 488, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-05", "timestamp": 1666582325, "time_this_iter_s": 3.5665717124938965, "time_total_s": 1850.8313839435577, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1850.8313839435577, "timesteps_since_restore": 0, "iterations_since_restore": 488, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.19, "shaped_reward_min": 113, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.6, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.01, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0014873126056045294, "policy_loss": -0.0018815764924511313, "vf_loss": 7.711610317230225, "vf_explained_var": 0.5623108148574829, "kl": 0.0029111807234585285, "entropy": 0.7537927031517029, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6259200, "num_env_steps_trained": 6259200, "num_agent_steps_sampled": 12518400, "num_agent_steps_trained": 12518400}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 353.0, "episode_reward_mean": 567.19, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 176.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.595}, "custom_metrics": {"sparse_reward_mean": 196.0, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 175.19, "shaped_reward_min": 113, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.52, "onion_pickup_agent_0_min": 12, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.44, "useful_onion_pickup_agent_0_min": 12, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.6, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 16.25, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.4, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.01, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.39, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 7, "useful_dish_pickup_agent_0_mean": 4.89, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.29, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 7, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.8, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.14, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.73, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.07, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.25, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.4, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.25, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.4, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928325929990626, "mean_inference_ms": 1.23501652551429, "mean_action_processing_ms": 0.1330490622589401, "mean_env_wait_ms": 0.8339361870383712, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 353.0, "episode_reward_mean": 567.19, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 176.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 283.595}, "hist_stats": {"episode_reward": [525.0, 570.0, 582.0, 576.0, 587.0, 573.0, 582.0, 627.0, 630.0, 579.0, 522.0, 519.0, 582.0, 576.0, 630.0, 630.0, 579.0, 573.0, 527.0, 579.0, 576.0, 582.0, 579.0, 582.0, 573.0, 582.0, 584.0, 587.0, 579.0, 582.0, 587.0, 525.0, 530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [261.0, 264.0, 275.0, 295.0, 291.0, 291.0, 291.0, 285.0, 298.0, 289.0, 291.0, 282.0, 296.0, 286.0, 317.0, 310.0, 314.0, 316.0, 284.0, 295.0, 262.0, 260.0, 262.0, 257.0, 286.0, 296.0, 286.0, 290.0, 314.0, 316.0, 314.0, 316.0, 286.0, 293.0, 288.0, 285.0, 269.0, 258.0, 281.0, 298.0, 294.0, 282.0, 289.0, 293.0, 292.0, 287.0, 294.0, 288.0, 285.0, 288.0, 291.0, 291.0, 288.0, 296.0, 297.0, 290.0, 289.0, 290.0, 296.0, 286.0, 299.0, 288.0, 260.0, 265.0, 262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6928325929990626, "mean_inference_ms": 1.23501652551429, "mean_action_processing_ms": 0.1330490622589401, "mean_env_wait_ms": 0.8339361870383712, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12518400, "num_agent_steps_trained": 12518400, "num_env_steps_sampled": 6259200, "num_env_steps_trained": 6259200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6259200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12518400, "timers": {"training_iteration_time_ms": 3537.262, "learn_time_ms": 1041.837, "learn_throughput": 12285.989, "synch_weights_time_ms": 12.137}, "counters": {"num_env_steps_sampled": 6259200, "num_env_steps_trained": 6259200, "num_agent_steps_sampled": 12518400, "num_agent_steps_trained": 12518400}, "done": false, "episodes_total": 15648, "training_iteration": 489, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-09", "timestamp": 1666582329, "time_this_iter_s": 3.4958958625793457, "time_total_s": 1854.327279806137, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1854.327279806137, "timesteps_since_restore": 0, "iterations_since_restore": 489, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.479999999999997, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 172.43, "shaped_reward_min": 113, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.53, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0012956246500834823, "policy_loss": -0.0016962646041065454, "vf_loss": 7.817093372344971, "vf_explained_var": 0.5554914474487305, "kl": 0.0026428524870425463, "entropy": 0.7621381878852844, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6272000, "num_env_steps_trained": 6272000, "num_agent_steps_sampled": 12544000, "num_agent_steps_trained": 12544000}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 353.0, "episode_reward_mean": 556.83, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 164.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 278.415}, "custom_metrics": {"sparse_reward_mean": 192.2, "sparse_reward_min": 120, "sparse_reward_max": 220, "shaped_reward_mean": 172.43, "shaped_reward_min": 113, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.09, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.53, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.99, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.46, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.32, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.04, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 2, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.18, "useful_dish_pickup_agent_1_min": 3, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.02, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.75, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.69, "soup_delivery_agent_0_min": 2, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.92, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.32, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.32, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927798074894155, "mean_inference_ms": 1.2349156682504754, "mean_action_processing_ms": 0.13304283052223212, "mean_env_wait_ms": 0.8338676264458542, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 353.0, "episode_reward_mean": 556.83, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 164.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 278.415}, "hist_stats": {"episode_reward": [530.0, 525.0, 573.0, 579.0, 636.0, 579.0, 525.0, 627.0, 530.0, 353.0, 582.0, 522.0, 582.0, 579.0, 573.0, 587.0, 584.0, 627.0, 627.0, 522.0, 579.0, 576.0, 525.0, 582.0, 516.0, 582.0, 525.0, 582.0, 579.0, 530.0, 530.0, 576.0, 587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [262.0, 268.0, 265.0, 260.0, 288.0, 285.0, 294.0, 285.0, 319.0, 317.0, 290.0, 289.0, 262.0, 263.0, 309.0, 318.0, 264.0, 266.0, 177.0, 176.0, 293.0, 289.0, 268.0, 254.0, 293.0, 289.0, 290.0, 289.0, 287.0, 286.0, 296.0, 291.0, 285.0, 299.0, 311.0, 316.0, 306.0, 321.0, 257.0, 265.0, 291.0, 288.0, 289.0, 287.0, 259.0, 266.0, 294.0, 288.0, 261.0, 255.0, 292.0, 290.0, 252.0, 273.0, 292.0, 290.0, 288.0, 291.0, 273.0, 257.0, 270.0, 260.0, 290.0, 286.0, 291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927798074894155, "mean_inference_ms": 1.2349156682504754, "mean_action_processing_ms": 0.13304283052223212, "mean_env_wait_ms": 0.8338676264458542, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12544000, "num_agent_steps_trained": 12544000, "num_env_steps_sampled": 6272000, "num_env_steps_trained": 6272000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6272000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12544000, "timers": {"training_iteration_time_ms": 3527.969, "learn_time_ms": 1049.929, "learn_throughput": 12191.303, "synch_weights_time_ms": 11.408}, "counters": {"num_env_steps_sampled": 6272000, "num_env_steps_trained": 6272000, "num_agent_steps_sampled": 12544000, "num_agent_steps_trained": 12544000}, "done": false, "episodes_total": 15680, "training_iteration": 490, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-12", "timestamp": 1666582332, "time_this_iter_s": 3.6058645248413086, "time_total_s": 1857.9331443309784, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1857.9331443309784, "timesteps_since_restore": 0, "iterations_since_restore": 490, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.7, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 169.77, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.23, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.87, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.13, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.63, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.57, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0024327579885721207, "policy_loss": -0.002835115185007453, "vf_loss": 7.80023193359375, "vf_explained_var": 0.5509117841720581, "kl": 0.003577027004212141, "entropy": 0.7553344964981079, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6284800, "num_env_steps_trained": 6284800, "num_agent_steps_sampled": 12569600, "num_agent_steps_trained": 12569600}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 548.57, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 274.285}, "custom_metrics": {"sparse_reward_mean": 189.4, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 169.77, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.0, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.23, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.87, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.13, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 2, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.0, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.9, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.71, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.13, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.02, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.63, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.57, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.0, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.0, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927431395642533, "mean_inference_ms": 1.234820555408916, "mean_action_processing_ms": 0.13303677357463245, "mean_env_wait_ms": 0.8338055430125193, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 548.57, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 274.285}, "hist_stats": {"episode_reward": [587.0, 516.0, 576.0, 579.0, 573.0, 513.0, 522.0, 522.0, 582.0, 576.0, 579.0, 519.0, 513.0, 530.0, 579.0, 579.0, 573.0, 570.0, 582.0, 582.0, 570.0, 582.0, 513.0, 576.0, 582.0, 570.0, 579.0, 573.0, 519.0, 582.0, 579.0, 579.0, 582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 296.0, 259.0, 257.0, 290.0, 286.0, 283.0, 296.0, 282.0, 291.0, 257.0, 256.0, 261.0, 261.0, 260.0, 262.0, 287.0, 295.0, 290.0, 286.0, 291.0, 288.0, 251.0, 268.0, 253.0, 260.0, 267.0, 263.0, 289.0, 290.0, 286.0, 293.0, 286.0, 287.0, 285.0, 285.0, 289.0, 293.0, 291.0, 291.0, 288.0, 282.0, 288.0, 294.0, 262.0, 251.0, 285.0, 291.0, 288.0, 294.0, 285.0, 285.0, 296.0, 283.0, 288.0, 285.0, 255.0, 264.0, 294.0, 288.0, 289.0, 290.0, 284.0, 295.0, 292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927431395642533, "mean_inference_ms": 1.234820555408916, "mean_action_processing_ms": 0.13303677357463245, "mean_env_wait_ms": 0.8338055430125193, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12569600, "num_agent_steps_trained": 12569600, "num_env_steps_sampled": 6284800, "num_env_steps_trained": 6284800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6284800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12569600, "timers": {"training_iteration_time_ms": 3545.175, "learn_time_ms": 1055.527, "learn_throughput": 12126.644, "synch_weights_time_ms": 12.153}, "counters": {"num_env_steps_sampled": 6284800, "num_env_steps_trained": 6284800, "num_agent_steps_sampled": 12569600, "num_agent_steps_trained": 12569600}, "done": false, "episodes_total": 15712, "training_iteration": 491, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-16", "timestamp": 1666582336, "time_this_iter_s": 3.721700668334961, "time_total_s": 1861.6548449993134, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1861.6548449993134, "timesteps_since_restore": 0, "iterations_since_restore": 491, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.94, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 169.89, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.06, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.18, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.09, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 0, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.8, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.95, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.96, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.65, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.58, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.8, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.95, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.8, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.95, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0008974068332463503, "policy_loss": -0.0012979262974113226, "vf_loss": 7.716615676879883, "vf_explained_var": 0.5693703889846802, "kl": 0.002612018259242177, "entropy": 0.7422833442687988, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6297600, "num_env_steps_trained": 6297600, "num_agent_steps_sampled": 12595200, "num_agent_steps_trained": 12595200}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 547.89, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 273.945}, "custom_metrics": {"sparse_reward_mean": 189.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 169.89, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.06, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 22, "onion_pickup_agent_1_mean": 15.18, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 15.91, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.09, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.01, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.0, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 0, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.8, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 14.95, "potting_onion_agent_1_min": 5, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.96, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.78, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.1, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.65, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.95, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.58, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.88, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.8, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 14.95, "optimal_onion_potting_agent_1_min": 5, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.8, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 14.95, "viable_onion_potting_agent_1_min": 5, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927135667510714, "mean_inference_ms": 1.2347289499299863, "mean_action_processing_ms": 0.13303129612242792, "mean_env_wait_ms": 0.8337459211393875, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 547.89, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 273.945}, "hist_stats": {"episode_reward": [582.0, 630.0, 582.0, 579.0, 576.0, 587.0, 579.0, 582.0, 465.0, 633.0, 525.0, 462.0, 525.0, 530.0, 525.0, 468.0, 522.0, 582.0, 530.0, 530.0, 525.0, 630.0, 627.0, 519.0, 468.0, 522.0, 582.0, 579.0, 522.0, 353.0, 530.0, 525.0, 630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [292.0, 290.0, 321.0, 309.0, 286.0, 296.0, 286.0, 293.0, 282.0, 294.0, 291.0, 296.0, 284.0, 295.0, 294.0, 288.0, 228.0, 237.0, 319.0, 314.0, 268.0, 257.0, 225.0, 237.0, 264.0, 261.0, 266.0, 264.0, 264.0, 261.0, 231.0, 237.0, 265.0, 257.0, 292.0, 290.0, 257.0, 273.0, 261.0, 269.0, 257.0, 268.0, 312.0, 318.0, 309.0, 318.0, 259.0, 260.0, 239.0, 229.0, 260.0, 262.0, 294.0, 288.0, 289.0, 290.0, 268.0, 254.0, 189.0, 164.0, 259.0, 271.0, 265.0, 260.0, 319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927135667510714, "mean_inference_ms": 1.2347289499299863, "mean_action_processing_ms": 0.13303129612242792, "mean_env_wait_ms": 0.8337459211393875, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12595200, "num_agent_steps_trained": 12595200, "num_env_steps_sampled": 6297600, "num_env_steps_trained": 6297600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6297600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12595200, "timers": {"training_iteration_time_ms": 3549.89, "learn_time_ms": 1056.008, "learn_throughput": 12121.115, "synch_weights_time_ms": 13.313}, "counters": {"num_env_steps_sampled": 6297600, "num_env_steps_trained": 6297600, "num_agent_steps_sampled": 12595200, "num_agent_steps_trained": 12595200}, "done": false, "episodes_total": 15744, "training_iteration": 492, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-20", "timestamp": 1666582340, "time_this_iter_s": 3.6109235286712646, "time_total_s": 1865.2657685279846, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1865.2657685279846, "timesteps_since_restore": 0, "iterations_since_restore": 492, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.51666666666667, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.55, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.28, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.5, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.22, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.83, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.68, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.22, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.22, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002336945617571473, "policy_loss": 0.001948866993188858, "vf_loss": 7.629458427429199, "vf_explained_var": 0.576850175857544, "kl": 0.002895065350458026, "entropy": 0.7497336268424988, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6310400, "num_env_steps_trained": 6310400, "num_agent_steps_sampled": 12620800, "num_agent_steps_trained": 12620800}, "sampler_results": {"episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 558.55, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.275}, "custom_metrics": {"sparse_reward_mean": 193.0, "sparse_reward_min": 100, "sparse_reward_max": 220, "shaped_reward_mean": 172.55, "shaped_reward_min": 94, "shaped_reward_max": 196, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.28, "onion_pickup_agent_0_min": 8, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.5, "onion_pickup_agent_1_min": 8, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.14, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.38, "useful_onion_pickup_agent_1_min": 7, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.01, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.0, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.22, "potting_onion_agent_1_min": 7, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.34, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.83, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.2, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.01, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.06, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.73, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.03, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.68, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.99, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.0, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.22, "optimal_onion_potting_agent_1_min": 7, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.0, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.22, "viable_onion_potting_agent_1_min": 7, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927007948953394, "mean_inference_ms": 1.2346649904093174, "mean_action_processing_ms": 0.1330296152925825, "mean_env_wait_ms": 0.8337135496717951, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 636.0, "episode_reward_min": 294.0, "episode_reward_mean": 558.55, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 141.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.275}, "hist_stats": {"episode_reward": [630.0, 636.0, 579.0, 582.0, 573.0, 294.0, 510.0, 570.0, 573.0, 522.0, 530.0, 525.0, 573.0, 582.0, 582.0, 522.0, 468.0, 294.0, 582.0, 576.0, 473.0, 576.0, 539.0, 510.0, 579.0, 573.0, 587.0, 573.0, 579.0, 519.0, 522.0, 582.0, 582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [319.0, 311.0, 316.0, 320.0, 289.0, 290.0, 291.0, 291.0, 285.0, 288.0, 149.0, 145.0, 263.0, 247.0, 288.0, 282.0, 285.0, 288.0, 259.0, 263.0, 264.0, 266.0, 260.0, 265.0, 282.0, 291.0, 291.0, 291.0, 291.0, 291.0, 263.0, 259.0, 230.0, 238.0, 141.0, 153.0, 286.0, 296.0, 288.0, 288.0, 240.0, 233.0, 291.0, 285.0, 274.0, 265.0, 251.0, 259.0, 293.0, 286.0, 290.0, 283.0, 299.0, 288.0, 286.0, 287.0, 291.0, 288.0, 267.0, 252.0, 260.0, 262.0, 283.0, 299.0, 293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6927007948953394, "mean_inference_ms": 1.2346649904093174, "mean_action_processing_ms": 0.1330296152925825, "mean_env_wait_ms": 0.8337135496717951, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12620800, "num_agent_steps_trained": 12620800, "num_env_steps_sampled": 6310400, "num_env_steps_trained": 6310400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6310400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12620800, "timers": {"training_iteration_time_ms": 3555.657, "learn_time_ms": 1059.616, "learn_throughput": 12079.845, "synch_weights_time_ms": 12.544}, "counters": {"num_env_steps_sampled": 6310400, "num_env_steps_trained": 6310400, "num_agent_steps_sampled": 12620800, "num_agent_steps_trained": 12620800}, "done": false, "episodes_total": 15776, "training_iteration": 493, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-24", "timestamp": 1666582344, "time_this_iter_s": 3.6424057483673096, "time_total_s": 1868.908174276352, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1868.908174276352, "timesteps_since_restore": 0, "iterations_since_restore": 493, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 24.2, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.65, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.14, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.85, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.73, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.85, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.86, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.82, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.85, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.85, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.002367196138948202, "policy_loss": 0.001961060333997011, "vf_loss": 7.817628860473633, "vf_explained_var": 0.5302909016609192, "kl": 0.0031909747049212456, "entropy": 0.7512529492378235, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6323200, "num_env_steps_trained": 6323200, "num_agent_steps_sampled": 12646400, "num_agent_steps_trained": 12646400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 563.25, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.625}, "custom_metrics": {"sparse_reward_mean": 194.8, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 173.65, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.14, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.85, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.03, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.73, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.02, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.85, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.6, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.08, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.94, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.11, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.02, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.86, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.97, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.82, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.93, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.85, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.6, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.85, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.6, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692662292395533, "mean_inference_ms": 1.234709989130729, "mean_action_processing_ms": 0.1330246376150592, "mean_env_wait_ms": 0.8336586327420582, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 563.25, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 281.625}, "hist_stats": {"episode_reward": [582.0, 530.0, 579.0, 519.0, 579.0, 582.0, 570.0, 579.0, 579.0, 570.0, 525.0, 576.0, 479.0, 627.0, 510.0, 525.0, 525.0, 579.0, 522.0, 525.0, 576.0, 636.0, 576.0, 525.0, 525.0, 573.0, 579.0, 590.0, 579.0, 573.0, 576.0, 573.0, 584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [293.0, 289.0, 259.0, 271.0, 286.0, 293.0, 264.0, 255.0, 290.0, 289.0, 290.0, 292.0, 290.0, 280.0, 294.0, 285.0, 296.0, 283.0, 287.0, 283.0, 260.0, 265.0, 289.0, 287.0, 236.0, 243.0, 311.0, 316.0, 268.0, 242.0, 262.0, 263.0, 260.0, 265.0, 284.0, 295.0, 261.0, 261.0, 258.0, 267.0, 284.0, 292.0, 320.0, 316.0, 294.0, 282.0, 266.0, 259.0, 263.0, 262.0, 288.0, 285.0, 292.0, 287.0, 291.0, 299.0, 285.0, 294.0, 282.0, 291.0, 288.0, 288.0, 288.0, 285.0, 294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.692662292395533, "mean_inference_ms": 1.234709989130729, "mean_action_processing_ms": 0.1330246376150592, "mean_env_wait_ms": 0.8336586327420582, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12646400, "num_agent_steps_trained": 12646400, "num_env_steps_sampled": 6323200, "num_env_steps_trained": 6323200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6323200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12646400, "timers": {"training_iteration_time_ms": 3584.233, "learn_time_ms": 1056.792, "learn_throughput": 12112.126, "synch_weights_time_ms": 12.517}, "counters": {"num_env_steps_sampled": 6323200, "num_env_steps_trained": 6323200, "num_agent_steps_sampled": 12646400, "num_agent_steps_trained": 12646400}, "done": false, "episodes_total": 15808, "training_iteration": 494, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-28", "timestamp": 1666582348, "time_this_iter_s": 3.8276045322418213, "time_total_s": 1872.7357788085938, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1872.7357788085938, "timesteps_since_restore": 0, "iterations_since_restore": 494, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 21.2, "ram_util_percent": 10.616666666666667}} +{"custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.95, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.55, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0009088999358937144, "policy_loss": 0.0005068274331279099, "vf_loss": 7.789777755737305, "vf_explained_var": 0.5383257865905762, "kl": 0.00321396766230464, "entropy": 0.7538089752197266, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6336000, "num_env_steps_trained": 6336000, "num_agent_steps_sampled": 12672000, "num_agent_steps_trained": 12672000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 560.95, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 280.475}, "custom_metrics": {"sparse_reward_mean": 194.0, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.95, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.13, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.7, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.0, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.55, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 22, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.02, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.86, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 23, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 22, "dish_pickup_agent_0_mean": 5.03, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.93, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.79, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.99, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 8, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 8, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.86, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 23, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 22, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.86, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 23, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 22, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6926126229211826, "mean_inference_ms": 1.2347401808599863, "mean_action_processing_ms": 0.133017718901637, "mean_env_wait_ms": 0.8335926233535009, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 560.95, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 280.475}, "hist_stats": {"episode_reward": [584.0, 473.0, 516.0, 582.0, 582.0, 582.0, 587.0, 630.0, 576.0, 579.0, 573.0, 525.0, 525.0, 582.0, 573.0, 567.0, 522.0, 582.0, 582.0, 630.0, 582.0, 582.0, 573.0, 576.0, 522.0, 587.0, 582.0, 576.0, 627.0, 576.0, 576.0, 579.0, 573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [294.0, 290.0, 232.0, 241.0, 262.0, 254.0, 291.0, 291.0, 290.0, 292.0, 291.0, 291.0, 292.0, 295.0, 313.0, 317.0, 288.0, 288.0, 292.0, 287.0, 288.0, 285.0, 266.0, 259.0, 262.0, 263.0, 286.0, 296.0, 286.0, 287.0, 277.0, 290.0, 260.0, 262.0, 290.0, 292.0, 298.0, 284.0, 308.0, 322.0, 288.0, 294.0, 295.0, 287.0, 279.0, 294.0, 289.0, 287.0, 252.0, 270.0, 291.0, 296.0, 292.0, 290.0, 292.0, 284.0, 314.0, 313.0, 280.0, 296.0, 288.0, 288.0, 294.0, 285.0, 284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6926126229211826, "mean_inference_ms": 1.2347401808599863, "mean_action_processing_ms": 0.133017718901637, "mean_env_wait_ms": 0.8335926233535009, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12672000, "num_agent_steps_trained": 12672000, "num_env_steps_sampled": 6336000, "num_env_steps_trained": 6336000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6336000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12672000, "timers": {"training_iteration_time_ms": 3570.269, "learn_time_ms": 1049.903, "learn_throughput": 12191.597, "synch_weights_time_ms": 12.94}, "counters": {"num_env_steps_sampled": 6336000, "num_env_steps_trained": 6336000, "num_agent_steps_sampled": 12672000, "num_agent_steps_trained": 12672000}, "done": false, "episodes_total": 15840, "training_iteration": 495, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-32", "timestamp": 1666582352, "time_this_iter_s": 3.4956037998199463, "time_total_s": 1876.2313826084137, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1876.2313826084137, "timesteps_since_restore": 0, "iterations_since_restore": 495, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.080000000000002, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.61, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.01, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.8, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.77, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": 0.0010021739872172475, "policy_loss": 0.0005998615524731576, "vf_loss": 7.735168933868408, "vf_explained_var": 0.5922386646270752, "kl": 0.0024840538389980793, "entropy": 0.7424072027206421, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6348800, "num_env_steps_trained": 6348800, "num_agent_steps_sampled": 12697600, "num_agent_steps_trained": 12697600}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 559.41, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.705}, "custom_metrics": {"sparse_reward_mean": 193.4, "sparse_reward_min": 160, "sparse_reward_max": 220, "shaped_reward_mean": 172.61, "shaped_reward_min": 124, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.01, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.8, "onion_pickup_agent_1_min": 9, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 15.89, "useful_onion_pickup_agent_0_min": 11, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.65, "useful_onion_pickup_agent_1_min": 9, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.03, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.02, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 15.75, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 9, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.13, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.19, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.01, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.06, "useful_dish_pickup_agent_1_min": 1, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.01, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.81, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 4.92, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.77, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.91, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.75, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 9, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.75, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 9, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6925542642918654, "mean_inference_ms": 1.2347568136712699, "mean_action_processing_ms": 0.13300886957405056, "mean_env_wait_ms": 0.8335146608846887, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 444.0, "episode_reward_mean": 559.41, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 219.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 279.705}, "hist_stats": {"episode_reward": [573.0, 582.0, 579.0, 573.0, 522.0, 522.0, 576.0, 582.0, 530.0, 639.0, 516.0, 570.0, 576.0, 582.0, 579.0, 582.0, 516.0, 582.0, 587.0, 522.0, 516.0, 576.0, 570.0, 522.0, 582.0, 579.0, 576.0, 522.0, 522.0, 579.0, 570.0, 570.0, 579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [284.0, 289.0, 289.0, 293.0, 289.0, 290.0, 288.0, 285.0, 260.0, 262.0, 264.0, 258.0, 288.0, 288.0, 285.0, 297.0, 264.0, 266.0, 322.0, 317.0, 260.0, 256.0, 279.0, 291.0, 294.0, 282.0, 288.0, 294.0, 288.0, 291.0, 294.0, 288.0, 258.0, 258.0, 291.0, 291.0, 296.0, 291.0, 256.0, 266.0, 261.0, 255.0, 288.0, 288.0, 278.0, 292.0, 261.0, 261.0, 294.0, 288.0, 289.0, 290.0, 287.0, 289.0, 261.0, 261.0, 265.0, 257.0, 293.0, 286.0, 288.0, 282.0, 291.0, 279.0, 291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6925542642918654, "mean_inference_ms": 1.2347568136712699, "mean_action_processing_ms": 0.13300886957405056, "mean_env_wait_ms": 0.8335146608846887, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12697600, "num_agent_steps_trained": 12697600, "num_env_steps_sampled": 6348800, "num_env_steps_trained": 6348800, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6348800, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12697600, "timers": {"training_iteration_time_ms": 3559.471, "learn_time_ms": 1047.13, "learn_throughput": 12223.894, "synch_weights_time_ms": 13.498}, "counters": {"num_env_steps_sampled": 6348800, "num_env_steps_trained": 6348800, "num_agent_steps_sampled": 12697600, "num_agent_steps_trained": 12697600}, "done": false, "episodes_total": 15872, "training_iteration": 496, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-35", "timestamp": 1666582355, "time_this_iter_s": 3.5756309032440186, "time_total_s": 1879.8070135116577, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1879.8070135116577, "timesteps_since_restore": 0, "iterations_since_restore": 496, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.72, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.18, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.24, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.26, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.13, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.65, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.13, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.13, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -4.584819544106722e-05, "policy_loss": -0.00045904231956228614, "vf_loss": 7.8609466552734375, "vf_explained_var": 0.5243180990219116, "kl": 0.004045985639095306, "entropy": 0.7458009719848633, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6361600, "num_env_steps_trained": 6361600, "num_agent_steps_sampled": 12723200, "num_agent_steps_trained": 12723200}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 180.0, "episode_reward_mean": 555.18, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.59}, "custom_metrics": {"sparse_reward_mean": 192.0, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 171.18, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.24, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.44, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.1, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.26, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.03, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.03, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.0, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 0, "potting_onion_agent_0_mean": 15.91, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 20, "potting_onion_agent_1_mean": 15.13, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 4.97, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.27, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.79, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.08, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.01, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 1, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.68, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.01, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.65, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 4.97, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 15.91, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 20, "optimal_onion_potting_agent_1_mean": 15.13, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 15.91, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 20, "viable_onion_potting_agent_1_mean": 15.13, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6925057574101982, "mean_inference_ms": 1.2346697990609385, "mean_action_processing_ms": 0.13300347069514712, "mean_env_wait_ms": 0.8334533851780352, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 180.0, "episode_reward_mean": 555.18, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 277.59}, "hist_stats": {"episode_reward": [579.0, 444.0, 519.0, 576.0, 573.0, 582.0, 576.0, 573.0, 579.0, 522.0, 579.0, 525.0, 573.0, 530.0, 570.0, 465.0, 525.0, 516.0, 525.0, 525.0, 582.0, 579.0, 582.0, 522.0, 573.0, 513.0, 525.0, 570.0, 630.0, 627.0, 582.0, 525.0, 525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [291.0, 288.0, 219.0, 225.0, 261.0, 258.0, 284.0, 292.0, 285.0, 288.0, 294.0, 288.0, 283.0, 293.0, 278.0, 295.0, 281.0, 298.0, 264.0, 258.0, 293.0, 286.0, 266.0, 259.0, 282.0, 291.0, 263.0, 267.0, 287.0, 283.0, 228.0, 237.0, 265.0, 260.0, 261.0, 255.0, 263.0, 262.0, 263.0, 262.0, 291.0, 291.0, 289.0, 290.0, 289.0, 293.0, 265.0, 257.0, 285.0, 288.0, 256.0, 257.0, 263.0, 262.0, 287.0, 283.0, 321.0, 309.0, 313.0, 314.0, 289.0, 293.0, 270.0, 255.0, 268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6925057574101982, "mean_inference_ms": 1.2346697990609385, "mean_action_processing_ms": 0.13300347069514712, "mean_env_wait_ms": 0.8334533851780352, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12723200, "num_agent_steps_trained": 12723200, "num_env_steps_sampled": 6361600, "num_env_steps_trained": 6361600, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6361600, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12723200, "timers": {"training_iteration_time_ms": 3556.961, "learn_time_ms": 1045.48, "learn_throughput": 12243.182, "synch_weights_time_ms": 13.117}, "counters": {"num_env_steps_sampled": 6361600, "num_env_steps_trained": 6361600, "num_agent_steps_sampled": 12723200, "num_agent_steps_trained": 12723200}, "done": false, "episodes_total": 15904, "training_iteration": 497, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-39", "timestamp": 1666582359, "time_this_iter_s": 3.6132354736328125, "time_total_s": 1883.4202489852905, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1883.4202489852905, "timesteps_since_restore": 0, "iterations_since_restore": 497, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.533333333333335, "ram_util_percent": 10.6}} +{"custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.46, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.76, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0005341880023479462, "policy_loss": -0.0009257107158191502, "vf_loss": 7.641595840454102, "vf_explained_var": 0.5615800619125366, "kl": 0.0024605700746178627, "entropy": 0.7452712655067444, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6374400, "num_env_steps_trained": 6374400, "num_agent_steps_sampled": 12748800, "num_agent_steps_trained": 12748800}, "sampler_results": {"episode_reward_max": 633.0, "episode_reward_min": 180.0, "episode_reward_mean": 562.35, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.175}, "custom_metrics": {"sparse_reward_mean": 194.4, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 173.55, "shaped_reward_min": 60, "shaped_reward_max": 193, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.46, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 21, "onion_pickup_agent_1_mean": 15.71, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 21, "useful_onion_pickup_agent_0_mean": 16.32, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 21, "useful_onion_pickup_agent_1_mean": 15.53, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.04, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.06, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 21, "potting_onion_agent_1_mean": 15.42, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.0, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.32, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.85, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.17, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.76, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.05, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.74, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.0, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.06, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 21, "optimal_onion_potting_agent_1_mean": 15.42, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.06, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 21, "viable_onion_potting_agent_1_mean": 15.42, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924666674580615, "mean_inference_ms": 1.234577253561023, "mean_action_processing_ms": 0.13299922325755478, "mean_env_wait_ms": 0.8334022090532933, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 633.0, "episode_reward_min": 180.0, "episode_reward_mean": 562.35, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 321.0}, "policy_reward_mean": {"ppo": 281.175}, "hist_stats": {"episode_reward": [525.0, 582.0, 582.0, 476.0, 633.0, 579.0, 570.0, 579.0, 525.0, 530.0, 579.0, 582.0, 522.0, 579.0, 582.0, 525.0, 576.0, 579.0, 582.0, 579.0, 522.0, 525.0, 582.0, 582.0, 522.0, 573.0, 573.0, 582.0, 573.0, 630.0, 519.0, 579.0, 579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [268.0, 257.0, 291.0, 291.0, 291.0, 291.0, 240.0, 236.0, 316.0, 317.0, 294.0, 285.0, 281.0, 289.0, 286.0, 293.0, 266.0, 259.0, 260.0, 270.0, 288.0, 291.0, 289.0, 293.0, 260.0, 262.0, 281.0, 298.0, 291.0, 291.0, 264.0, 261.0, 289.0, 287.0, 289.0, 290.0, 299.0, 283.0, 288.0, 291.0, 256.0, 266.0, 265.0, 260.0, 288.0, 294.0, 291.0, 291.0, 260.0, 262.0, 280.0, 293.0, 279.0, 294.0, 294.0, 288.0, 293.0, 280.0, 318.0, 312.0, 262.0, 257.0, 296.0, 283.0, 288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924666674580615, "mean_inference_ms": 1.234577253561023, "mean_action_processing_ms": 0.13299922325755478, "mean_env_wait_ms": 0.8334022090532933, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12748800, "num_agent_steps_trained": 12748800, "num_env_steps_sampled": 6374400, "num_env_steps_trained": 6374400, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6374400, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12748800, "timers": {"training_iteration_time_ms": 3555.338, "learn_time_ms": 1042.446, "learn_throughput": 12278.813, "synch_weights_time_ms": 12.005}, "counters": {"num_env_steps_sampled": 6374400, "num_env_steps_trained": 6374400, "num_agent_steps_sampled": 12748800, "num_agent_steps_trained": 12748800}, "done": false, "episodes_total": 15936, "training_iteration": 498, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-43", "timestamp": 1666582363, "time_this_iter_s": 3.5497889518737793, "time_total_s": 1886.9700379371643, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1886.9700379371643, "timesteps_since_restore": 0, "iterations_since_restore": 498, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 23.68, "ram_util_percent": 10.62}} +{"custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.36, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.53, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.79, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.61, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.82, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.001022728392854333, "policy_loss": -0.0014227591454982758, "vf_loss": 7.671509265899658, "vf_explained_var": 0.5705825090408325, "kl": 0.0024727080017328262, "entropy": 0.7342387437820435, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6387200, "num_env_steps_trained": 6387200, "num_agent_steps_sampled": 12774400, "num_agent_steps_trained": 12774400}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 566.76, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.38}, "custom_metrics": {"sparse_reward_mean": 196.2, "sparse_reward_min": 60, "sparse_reward_max": 220, "shaped_reward_mean": 174.36, "shaped_reward_min": 60, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.53, "onion_pickup_agent_0_min": 9, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 15.79, "onion_pickup_agent_1_min": 5, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.41, "useful_onion_pickup_agent_0_min": 8, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 15.61, "useful_onion_pickup_agent_1_min": 5, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.06, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.01, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 1, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.14, "potting_onion_agent_0_min": 8, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.5, "potting_onion_agent_1_min": 4, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.05, "dish_pickup_agent_0_min": 1, "dish_pickup_agent_0_max": 7, "dish_pickup_agent_1_mean": 5.3, "dish_pickup_agent_1_min": 2, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 4.86, "useful_dish_pickup_agent_0_min": 1, "useful_dish_pickup_agent_0_max": 7, "useful_dish_pickup_agent_1_mean": 5.12, "useful_dish_pickup_agent_1_min": 0, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.04, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.05, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.01, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 1, "soup_pickup_agent_0_mean": 4.83, "soup_pickup_agent_0_min": 1, "soup_pickup_agent_0_max": 7, "soup_pickup_agent_1_mean": 5.07, "soup_pickup_agent_1_min": 2, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.82, "soup_delivery_agent_0_min": 1, "soup_delivery_agent_0_max": 7, "soup_delivery_agent_1_mean": 5.01, "soup_delivery_agent_1_min": 2, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.01, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 1, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.14, "optimal_onion_potting_agent_0_min": 8, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.5, "optimal_onion_potting_agent_1_min": 4, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.14, "viable_onion_potting_agent_0_min": 8, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.5, "viable_onion_potting_agent_1_min": 4, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0, 516.0, 582.0, 639.0, 576.0, 456.0, 633.0, 630.0, 633.0, 582.0, 576.0, 579.0, 579.0, 582.0, 525.0, 405.0, 636.0, 576.0, 516.0, 587.0, 573.0, 576.0, 636.0, 579.0, 527.0, 582.0, 462.0, 633.0, 627.0, 630.0, 633.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0, 265.0, 251.0, 291.0, 291.0, 322.0, 317.0, 280.0, 296.0, 228.0, 228.0, 311.0, 322.0, 319.0, 311.0, 317.0, 316.0, 296.0, 286.0, 284.0, 292.0, 292.0, 287.0, 283.0, 296.0, 290.0, 292.0, 262.0, 263.0, 208.0, 197.0, 317.0, 319.0, 289.0, 287.0, 265.0, 251.0, 288.0, 299.0, 290.0, 283.0, 294.0, 282.0, 320.0, 316.0, 290.0, 289.0, 265.0, 262.0, 291.0, 291.0, 231.0, 231.0, 317.0, 316.0, 316.0, 311.0, 309.0, 321.0, 321.0, 312.0, 287.0, 283.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924363322164473, "mean_inference_ms": 1.2344869938567593, "mean_action_processing_ms": 0.13299604129951562, "mean_env_wait_ms": 0.8333524514034752, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 180.0, "episode_reward_mean": 566.76, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 88.0}, "policy_reward_max": {"ppo": 322.0}, "policy_reward_mean": {"ppo": 283.38}, "hist_stats": {"episode_reward": [579.0, 587.0, 582.0, 525.0, 579.0, 576.0, 519.0, 570.0, 587.0, 582.0, 516.0, 570.0, 579.0, 579.0, 579.0, 570.0, 516.0, 573.0, 582.0, 492.0, 180.0, 579.0, 627.0, 479.0, 504.0, 579.0, 579.0, 519.0, 576.0, 530.0, 533.0, 590.0, 579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0, 516.0, 582.0, 639.0, 576.0, 456.0, 633.0, 630.0, 633.0, 582.0, 576.0, 579.0, 579.0, 582.0, 525.0, 405.0, 636.0, 576.0, 516.0, 587.0, 573.0, 576.0, 636.0, 579.0, 527.0, 582.0, 462.0, 633.0, 627.0, 630.0, 633.0, 570.0, 633.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [288.0, 291.0, 297.0, 290.0, 287.0, 295.0, 266.0, 259.0, 294.0, 285.0, 288.0, 288.0, 262.0, 257.0, 284.0, 286.0, 291.0, 296.0, 293.0, 289.0, 253.0, 263.0, 288.0, 282.0, 290.0, 289.0, 292.0, 287.0, 288.0, 291.0, 283.0, 287.0, 266.0, 250.0, 288.0, 285.0, 289.0, 293.0, 242.0, 250.0, 92.0, 88.0, 293.0, 286.0, 316.0, 311.0, 245.0, 234.0, 245.0, 259.0, 288.0, 291.0, 290.0, 289.0, 263.0, 256.0, 282.0, 294.0, 259.0, 271.0, 268.0, 265.0, 291.0, 299.0, 283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0, 265.0, 251.0, 291.0, 291.0, 322.0, 317.0, 280.0, 296.0, 228.0, 228.0, 311.0, 322.0, 319.0, 311.0, 317.0, 316.0, 296.0, 286.0, 284.0, 292.0, 292.0, 287.0, 283.0, 296.0, 290.0, 292.0, 262.0, 263.0, 208.0, 197.0, 317.0, 319.0, 289.0, 287.0, 265.0, 251.0, 288.0, 299.0, 290.0, 283.0, 294.0, 282.0, 320.0, 316.0, 290.0, 289.0, 265.0, 262.0, 291.0, 291.0, 231.0, 231.0, 317.0, 316.0, 316.0, 311.0, 309.0, 321.0, 321.0, 312.0, 287.0, 283.0, 317.0, 316.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6924363322164473, "mean_inference_ms": 1.2344869938567593, "mean_action_processing_ms": 0.13299604129951562, "mean_env_wait_ms": 0.8333524514034752, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12774400, "num_agent_steps_trained": 12774400, "num_env_steps_sampled": 6387200, "num_env_steps_trained": 6387200, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6387200, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12774400, "timers": {"training_iteration_time_ms": 3572.611, "learn_time_ms": 1048.765, "learn_throughput": 12204.837, "synch_weights_time_ms": 12.104}, "counters": {"num_env_steps_sampled": 6387200, "num_env_steps_trained": 6387200, "num_agent_steps_sampled": 12774400, "num_agent_steps_trained": 12774400}, "done": false, "episodes_total": 15968, "training_iteration": 499, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-47", "timestamp": 1666582367, "time_this_iter_s": 3.6723570823669434, "time_total_s": 1890.6423950195312, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1890.6423950195312, "timesteps_since_restore": 0, "iterations_since_restore": 499, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 20.866666666666667, "ram_util_percent": 10.616666666666667}} +{"evaluation": {"average_sparse_reward": 200.0, "num_healthy_workers": 0, "num_recreated_workers": 0}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.73, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.24, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.02, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "episode_media": {}, "num_recreated_workers": 0, "info": {"learner": {"ppo": {"learner_stats": {"cur_kl_coeff": 0.0, "cur_lr": 0.0010000000474974513, "total_loss": -0.0010659887921065092, "policy_loss": -0.0014806217513978481, "vf_loss": 7.818281173706055, "vf_explained_var": 0.5564167499542236, "kl": 0.0027102380990982056, "entropy": 0.7343902587890625, "entropy_coeff": 0.0005000000237487257, "model": {}}}}, "num_env_steps_sampled": 6400000, "num_env_steps_trained": 6400000, "num_agent_steps_sampled": 12800000, "num_agent_steps_trained": 12800000}, "sampler_results": {"episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 573.93, "episode_len_mean": 400.0, "episode_media": {}, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 197.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.965}, "custom_metrics": {"sparse_reward_mean": 198.6, "sparse_reward_min": 140, "sparse_reward_max": 220, "shaped_reward_mean": 176.73, "shaped_reward_min": 125, "shaped_reward_max": 199, "tomato_pickup_agent_0_mean": 0.0, "tomato_pickup_agent_0_min": 0, "tomato_pickup_agent_0_max": 0, "tomato_pickup_agent_1_mean": 0.0, "tomato_pickup_agent_1_min": 0, "tomato_pickup_agent_1_max": 0, "useful_tomato_pickup_agent_0_mean": 0.0, "useful_tomato_pickup_agent_0_min": 0, "useful_tomato_pickup_agent_0_max": 0, "useful_tomato_pickup_agent_1_mean": 0.0, "useful_tomato_pickup_agent_1_min": 0, "useful_tomato_pickup_agent_1_max": 0, "tomato_drop_agent_0_mean": 0.0, "tomato_drop_agent_0_min": 0, "tomato_drop_agent_0_max": 0, "tomato_drop_agent_1_mean": 0.0, "tomato_drop_agent_1_min": 0, "tomato_drop_agent_1_max": 0, "useful_tomato_drop_agent_0_mean": 0.0, "useful_tomato_drop_agent_0_min": 0, "useful_tomato_drop_agent_0_max": 0, "useful_tomato_drop_agent_1_mean": 0.0, "useful_tomato_drop_agent_1_min": 0, "useful_tomato_drop_agent_1_max": 0, "potting_tomato_agent_0_mean": 0.0, "potting_tomato_agent_0_min": 0, "potting_tomato_agent_0_max": 0, "potting_tomato_agent_1_mean": 0.0, "potting_tomato_agent_1_min": 0, "potting_tomato_agent_1_max": 0, "onion_pickup_agent_0_mean": 16.51, "onion_pickup_agent_0_min": 11, "onion_pickup_agent_0_max": 23, "onion_pickup_agent_1_mean": 16.24, "onion_pickup_agent_1_min": 10, "onion_pickup_agent_1_max": 22, "useful_onion_pickup_agent_0_mean": 16.42, "useful_onion_pickup_agent_0_min": 10, "useful_onion_pickup_agent_0_max": 23, "useful_onion_pickup_agent_1_mean": 16.02, "useful_onion_pickup_agent_1_min": 10, "useful_onion_pickup_agent_1_max": 21, "onion_drop_agent_0_mean": 0.04, "onion_drop_agent_0_min": 0, "onion_drop_agent_0_max": 1, "onion_drop_agent_1_mean": 0.05, "onion_drop_agent_1_min": 0, "onion_drop_agent_1_max": 1, "useful_onion_drop_agent_0_mean": 0.0, "useful_onion_drop_agent_0_min": 0, "useful_onion_drop_agent_0_max": 0, "useful_onion_drop_agent_1_mean": 0.01, "useful_onion_drop_agent_1_min": 0, "useful_onion_drop_agent_1_max": 1, "potting_onion_agent_0_mean": 16.1, "potting_onion_agent_0_min": 11, "potting_onion_agent_0_max": 22, "potting_onion_agent_1_mean": 15.96, "potting_onion_agent_1_min": 10, "potting_onion_agent_1_max": 21, "dish_pickup_agent_0_mean": 5.18, "dish_pickup_agent_0_min": 3, "dish_pickup_agent_0_max": 8, "dish_pickup_agent_1_mean": 5.29, "dish_pickup_agent_1_min": 3, "dish_pickup_agent_1_max": 8, "useful_dish_pickup_agent_0_mean": 5.06, "useful_dish_pickup_agent_0_min": 3, "useful_dish_pickup_agent_0_max": 8, "useful_dish_pickup_agent_1_mean": 5.14, "useful_dish_pickup_agent_1_min": 2, "useful_dish_pickup_agent_1_max": 8, "dish_drop_agent_0_mean": 0.03, "dish_drop_agent_0_min": 0, "dish_drop_agent_0_max": 1, "dish_drop_agent_1_mean": 0.03, "dish_drop_agent_1_min": 0, "dish_drop_agent_1_max": 1, "useful_dish_drop_agent_0_mean": 0.0, "useful_dish_drop_agent_0_min": 0, "useful_dish_drop_agent_0_max": 0, "useful_dish_drop_agent_1_mean": 0.0, "useful_dish_drop_agent_1_min": 0, "useful_dish_drop_agent_1_max": 0, "soup_pickup_agent_0_mean": 4.99, "soup_pickup_agent_0_min": 3, "soup_pickup_agent_0_max": 8, "soup_pickup_agent_1_mean": 5.0, "soup_pickup_agent_1_min": 3, "soup_pickup_agent_1_max": 7, "soup_delivery_agent_0_mean": 4.97, "soup_delivery_agent_0_min": 3, "soup_delivery_agent_0_max": 8, "soup_delivery_agent_1_mean": 4.96, "soup_delivery_agent_1_min": 3, "soup_delivery_agent_1_max": 7, "soup_drop_agent_0_mean": 0.0, "soup_drop_agent_0_min": 0, "soup_drop_agent_0_max": 0, "soup_drop_agent_1_mean": 0.0, "soup_drop_agent_1_min": 0, "soup_drop_agent_1_max": 0, "optimal_onion_potting_agent_0_mean": 16.1, "optimal_onion_potting_agent_0_min": 11, "optimal_onion_potting_agent_0_max": 22, "optimal_onion_potting_agent_1_mean": 15.96, "optimal_onion_potting_agent_1_min": 10, "optimal_onion_potting_agent_1_max": 21, "optimal_tomato_potting_agent_0_mean": 0.0, "optimal_tomato_potting_agent_0_min": 0, "optimal_tomato_potting_agent_0_max": 0, "optimal_tomato_potting_agent_1_mean": 0.0, "optimal_tomato_potting_agent_1_min": 0, "optimal_tomato_potting_agent_1_max": 0, "viable_onion_potting_agent_0_mean": 16.1, "viable_onion_potting_agent_0_min": 11, "viable_onion_potting_agent_0_max": 22, "viable_onion_potting_agent_1_mean": 15.96, "viable_onion_potting_agent_1_min": 10, "viable_onion_potting_agent_1_max": 21, "viable_tomato_potting_agent_0_mean": 0.0, "viable_tomato_potting_agent_0_min": 0, "viable_tomato_potting_agent_0_max": 0, "viable_tomato_potting_agent_1_mean": 0.0, "viable_tomato_potting_agent_1_min": 0, "viable_tomato_potting_agent_1_max": 0, "catastrophic_onion_potting_agent_0_mean": 0.0, "catastrophic_onion_potting_agent_0_min": 0, "catastrophic_onion_potting_agent_0_max": 0, "catastrophic_onion_potting_agent_1_mean": 0.0, "catastrophic_onion_potting_agent_1_min": 0, "catastrophic_onion_potting_agent_1_max": 0, "catastrophic_tomato_potting_agent_0_mean": 0.0, "catastrophic_tomato_potting_agent_0_min": 0, "catastrophic_tomato_potting_agent_0_max": 0, "catastrophic_tomato_potting_agent_1_mean": 0.0, "catastrophic_tomato_potting_agent_1_min": 0, "catastrophic_tomato_potting_agent_1_max": 0, "useless_onion_potting_agent_0_mean": 0.0, "useless_onion_potting_agent_0_min": 0, "useless_onion_potting_agent_0_max": 0, "useless_onion_potting_agent_1_mean": 0.0, "useless_onion_potting_agent_1_min": 0, "useless_onion_potting_agent_1_max": 0, "useless_tomato_potting_agent_0_mean": 0.0, "useless_tomato_potting_agent_0_min": 0, "useless_tomato_potting_agent_0_max": 0, "useless_tomato_potting_agent_1_mean": 0.0, "useless_tomato_potting_agent_1_min": 0, "useless_tomato_potting_agent_1_max": 0}, "hist_stats": {"episode_reward": [579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0, 516.0, 582.0, 639.0, 576.0, 456.0, 633.0, 630.0, 633.0, 582.0, 576.0, 579.0, 579.0, 582.0, 525.0, 405.0, 636.0, 576.0, 516.0, 587.0, 573.0, 576.0, 636.0, 579.0, 527.0, 582.0, 462.0, 633.0, 627.0, 630.0, 633.0, 570.0, 633.0, 519.0, 627.0, 462.0, 630.0, 590.0, 582.0, 522.0, 579.0, 530.0, 465.0, 465.0, 579.0, 576.0, 576.0, 582.0, 579.0, 633.0, 630.0, 582.0, 516.0, 590.0, 522.0, 582.0, 573.0, 582.0, 582.0, 633.0, 633.0, 579.0, 573.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0, 265.0, 251.0, 291.0, 291.0, 322.0, 317.0, 280.0, 296.0, 228.0, 228.0, 311.0, 322.0, 319.0, 311.0, 317.0, 316.0, 296.0, 286.0, 284.0, 292.0, 292.0, 287.0, 283.0, 296.0, 290.0, 292.0, 262.0, 263.0, 208.0, 197.0, 317.0, 319.0, 289.0, 287.0, 265.0, 251.0, 288.0, 299.0, 290.0, 283.0, 294.0, 282.0, 320.0, 316.0, 290.0, 289.0, 265.0, 262.0, 291.0, 291.0, 231.0, 231.0, 317.0, 316.0, 316.0, 311.0, 309.0, 321.0, 321.0, 312.0, 287.0, 283.0, 317.0, 316.0, 269.0, 250.0, 313.0, 314.0, 228.0, 234.0, 323.0, 307.0, 296.0, 294.0, 289.0, 293.0, 259.0, 263.0, 290.0, 289.0, 268.0, 262.0, 231.0, 234.0, 235.0, 230.0, 291.0, 288.0, 287.0, 289.0, 293.0, 283.0, 291.0, 291.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 289.0, 293.0, 253.0, 263.0, 296.0, 294.0, 251.0, 271.0, 291.0, 291.0, 289.0, 284.0, 285.0, 297.0, 294.0, 288.0, 316.0, 317.0, 319.0, 314.0, 289.0, 290.0, 285.0, 288.0, 289.0, 290.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6923988056837467, "mean_inference_ms": 1.2343933696692113, "mean_action_processing_ms": 0.13299150779059749, "mean_env_wait_ms": 0.8332940369038218, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0}, "episode_reward_max": 639.0, "episode_reward_min": 405.0, "episode_reward_mean": 573.93, "episode_len_mean": 400.0, "episodes_this_iter": 32, "policy_reward_min": {"ppo": 197.0}, "policy_reward_max": {"ppo": 323.0}, "policy_reward_mean": {"ppo": 286.965}, "hist_stats": {"episode_reward": [579.0, 573.0, 579.0, 576.0, 582.0, 582.0, 522.0, 587.0, 582.0, 579.0, 627.0, 573.0, 525.0, 579.0, 579.0, 582.0, 579.0, 570.0, 630.0, 522.0, 579.0, 570.0, 627.0, 513.0, 582.0, 573.0, 582.0, 579.0, 525.0, 579.0, 582.0, 630.0, 579.0, 579.0, 579.0, 525.0, 516.0, 582.0, 639.0, 576.0, 456.0, 633.0, 630.0, 633.0, 582.0, 576.0, 579.0, 579.0, 582.0, 525.0, 405.0, 636.0, 576.0, 516.0, 587.0, 573.0, 576.0, 636.0, 579.0, 527.0, 582.0, 462.0, 633.0, 627.0, 630.0, 633.0, 570.0, 633.0, 519.0, 627.0, 462.0, 630.0, 590.0, 582.0, 522.0, 579.0, 530.0, 465.0, 465.0, 579.0, 576.0, 576.0, 582.0, 579.0, 633.0, 630.0, 582.0, 516.0, 590.0, 522.0, 582.0, 573.0, 582.0, 582.0, 633.0, 633.0, 579.0, 573.0, 579.0, 582.0], "episode_lengths": [400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400], "policy_ppo_reward": [283.0, 296.0, 284.0, 289.0, 290.0, 289.0, 290.0, 286.0, 286.0, 296.0, 294.0, 288.0, 259.0, 263.0, 287.0, 300.0, 294.0, 288.0, 288.0, 291.0, 321.0, 306.0, 288.0, 285.0, 258.0, 267.0, 287.0, 292.0, 293.0, 286.0, 291.0, 291.0, 294.0, 285.0, 278.0, 292.0, 319.0, 311.0, 253.0, 269.0, 291.0, 288.0, 281.0, 289.0, 308.0, 319.0, 262.0, 251.0, 291.0, 291.0, 291.0, 282.0, 285.0, 297.0, 286.0, 293.0, 265.0, 260.0, 290.0, 289.0, 292.0, 290.0, 316.0, 314.0, 291.0, 288.0, 288.0, 291.0, 288.0, 291.0, 257.0, 268.0, 265.0, 251.0, 291.0, 291.0, 322.0, 317.0, 280.0, 296.0, 228.0, 228.0, 311.0, 322.0, 319.0, 311.0, 317.0, 316.0, 296.0, 286.0, 284.0, 292.0, 292.0, 287.0, 283.0, 296.0, 290.0, 292.0, 262.0, 263.0, 208.0, 197.0, 317.0, 319.0, 289.0, 287.0, 265.0, 251.0, 288.0, 299.0, 290.0, 283.0, 294.0, 282.0, 320.0, 316.0, 290.0, 289.0, 265.0, 262.0, 291.0, 291.0, 231.0, 231.0, 317.0, 316.0, 316.0, 311.0, 309.0, 321.0, 321.0, 312.0, 287.0, 283.0, 317.0, 316.0, 269.0, 250.0, 313.0, 314.0, 228.0, 234.0, 323.0, 307.0, 296.0, 294.0, 289.0, 293.0, 259.0, 263.0, 290.0, 289.0, 268.0, 262.0, 231.0, 234.0, 235.0, 230.0, 291.0, 288.0, 287.0, 289.0, 293.0, 283.0, 291.0, 291.0, 294.0, 285.0, 319.0, 314.0, 316.0, 314.0, 289.0, 293.0, 253.0, 263.0, 296.0, 294.0, 251.0, 271.0, 291.0, 291.0, 289.0, 284.0, 285.0, 297.0, 294.0, 288.0, 316.0, 317.0, 319.0, 314.0, 289.0, 290.0, 285.0, 288.0, 289.0, 290.0, 291.0, 291.0]}, "sampler_perf": {"mean_raw_obs_processing_ms": 0.6923988056837467, "mean_inference_ms": 1.2343933696692113, "mean_action_processing_ms": 0.13299150779059749, "mean_env_wait_ms": 0.8332940369038218, "mean_env_render_ms": 0.0}, "num_faulty_episodes": 0, "num_healthy_workers": 16, "num_agent_steps_sampled": 12800000, "num_agent_steps_trained": 12800000, "num_env_steps_sampled": 6400000, "num_env_steps_trained": 6400000, "num_env_steps_sampled_this_iter": 12800, "num_env_steps_trained_this_iter": 12800, "timesteps_total": 6400000, "num_steps_trained_this_iter": 12800, "agent_timesteps_total": 12800000, "timers": {"training_iteration_time_ms": 3575.465, "learn_time_ms": 1047.578, "learn_throughput": 12218.661, "synch_weights_time_ms": 13.203}, "counters": {"num_env_steps_sampled": 6400000, "num_env_steps_trained": 6400000, "num_agent_steps_sampled": 12800000, "num_agent_steps_trained": 12800000}, "done": false, "episodes_total": 16000, "training_iteration": 500, "trial_id": "default", "experiment_id": "cf8595fc459044c399eb52a9c8595a85", "date": "2022-10-23_20-32-54", "timestamp": 1666582374, "time_this_iter_s": 7.072265625, "time_total_s": 1897.7146606445312, "pid": 1816099, "hostname": "svm.bair.berkeley.edu", "node_ip": "128.32.175.10", "config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 400, "batch_mode": "truncate_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {"extra_python_environs_for_driver": {}, "extra_python_environs_for_worker": {}, "num_gpus": 1, "num_cpus_per_worker": 1, "num_gpus_per_worker": 0, "_fake_gpus": false, "custom_resources_per_worker": {}, "placement_strategy": "PACK", "eager_tracing": false, "eager_max_retraces": 20, "tf_session_args": {"intra_op_parallelism_threads": 2, "inter_op_parallelism_threads": 2, "gpu_options": {"allow_growth": true}, "log_device_placement": false, "device_count": {"CPU": 1}, "allow_soft_placement": true}, "local_tf_session_args": {"intra_op_parallelism_threads": 8, "inter_op_parallelism_threads": 8}, "env": "overcooked_multi_agent", "env_config": {"mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}, "env_params": {"horizon": 400, "mlam_params": {"start_orientations": false, "wait_allowed": false, "counter_goals": [], "counter_drop": [], "counter_pickup": [], "same_motion_goals": true}}, "multi_agent_params": {"reward_shaping_factor": 1.0, "reward_shaping_horizon": Infinity, "use_phi": false, "bc_schedule": [[0, 0], [Infinity, 0]]}, "outer_shape": null, "eval_mdp_params": {"layout_name": "cramped_room", "rew_shaping_params": {"PLACEMENT_IN_POT_REW": 3, "DISH_PICKUP_REWARD": 3, "SOUP_PICKUP_REWARD": 5, "DISH_DISP_DISTANCE_REW": 0, "POT_DISTANCE_REW": 0, "SOUP_DISTANCE_REW": 0}}}, "observation_space": null, "action_space": null, "env_task_fn": null, "render_env": false, "clip_rewards": null, "normalize_actions": true, "clip_actions": false, "disable_env_checking": false, "num_workers": 16, "num_envs_per_worker": 1, "sample_collector": "", "sample_async": false, "enable_connectors": false, "rollout_fragment_length": 1, "batch_mode": "complete_episodes", "remote_worker_envs": false, "remote_env_batch_wait_ms": 0, "validate_workers_after_construction": true, "ignore_worker_failures": false, "recreate_failed_workers": false, "restart_failed_sub_environments": false, "num_consecutive_worker_failures_tolerance": 100, "horizon": null, "soft_horizon": false, "no_done_at_end": false, "preprocessor_pref": "deepmind", "observation_filter": "NoFilter", "synchronize_filters": true, "compress_observations": false, "enable_tf1_exec_eagerly": false, "sampler_perf_stats_ema_coef": null, "gamma": 0.99, "lr": 0.001, "train_batch_size": 12800, "model": {"_use_default_native_models": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "fcnet_hiddens": [256, 256], "fcnet_activation": "tanh", "conv_filters": null, "conv_activation": "relu", "post_fcnet_hiddens": [], "post_fcnet_activation": "relu", "free_log_std": false, "no_final_linear": false, "vf_share_layers": false, "use_lstm": false, "max_seq_len": 20, "lstm_cell_size": 256, "lstm_use_prev_action": false, "lstm_use_prev_reward": false, "_time_major": false, "use_attention": false, "attention_num_transformer_units": 1, "attention_dim": 64, "attention_num_heads": 1, "attention_head_dim": 32, "attention_memory_inference": 50, "attention_memory_training": 50, "attention_position_wise_mlp_dim": 32, "attention_init_gru_gate_bias": 2.0, "attention_use_n_prev_actions": 0, "attention_use_n_prev_rewards": 0, "framestack": true, "dim": 84, "grayscale": false, "zero_mean": true, "custom_model": null, "custom_model_config": {}, "custom_action_dist": null, "custom_preprocessor": null, "lstm_use_prev_action_reward": -1}, "optimizer": {}, "explore": true, "exploration_config": {"type": "StochasticSampling"}, "input_config": {}, "actions_in_input_normalized": false, "postprocess_inputs": false, "shuffle_buffer_size": 0, "output": null, "output_config": {}, "output_compress_columns": ["obs", "new_obs"], "output_max_file_size": 67108864, "evaluation_interval": 100, "evaluation_duration": 10, "evaluation_duration_unit": "episodes", "evaluation_sample_timeout_s": 180.0, "evaluation_parallel_to_training": false, "evaluation_config": {}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": true, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "off_policy_estimation_methods": {}, "evaluation_num_workers": 0, "always_attach_evaluation_results": false, "in_evaluation": false, "sync_filters_on_rollout_workers_timeout_s": 60.0, "keep_per_episode_custom_metrics": false, "metrics_episode_collection_timeout_s": 60.0, "metrics_num_episodes_for_smoothing": 100, "min_time_s_per_iteration": null, "min_train_timesteps_per_iteration": 0, "min_sample_timesteps_per_iteration": 0, "logger_creator": null, "logger_config": null, "log_level": "WARN", "log_sys_usage": true, "fake_sampler": false, "seed": 11, "_tf_policy_handles_more_than_one_loss": false, "_disable_preprocessor_api": false, "_disable_action_flattening": false, "_disable_execution_plan_api": true, "simple_optimizer": true, "monitor": -1, "evaluation_num_episodes": -1, "metrics_smoothing_episodes": -1, "timesteps_per_iteration": -1, "min_iter_time_s": -1, "collect_metrics_timeout": -1, "buffer_size": -1, "prioritized_replay": -1, "learning_starts": -1, "replay_batch_size": -1, "replay_sequence_length": null, "prioritized_replay_alpha": -1, "prioritized_replay_beta": -1, "prioritized_replay_eps": -1, "min_time_s_per_reporting": -1, "min_train_timesteps_per_reporting": -1, "min_sample_timesteps_per_reporting": -1, "input_evaluation": -1, "lr_schedule": null, "use_critic": true, "use_gae": true, "kl_coeff": 0.2, "sgd_minibatch_size": 8000, "num_sgd_iter": 8, "shuffle_sequences": true, "vf_loss_coeff": 0.0001, "entropy_coeff": 0.0, "entropy_coeff_schedule": [[0, 0.2], [300000.0, 0.0005]], "clip_param": 0.05, "vf_clip_param": 10.0, "grad_clip": 0.1, "kl_target": 0.01, "vf_share_layers": true, "lambda": 0.98, "input": "sampler", "multiagent": {"policies": {"ppo": ""}, "policy_map_capacity": 100, "policy_map_cache": null, "policy_mapping_fn": ".select_policy at 0x7f5196324b00>", "policies_to_train": "{'ppo'}", "observation_fn": null, "replay_mode": "independent", "count_steps_by": "env_steps"}, "callbacks": "", "create_env_on_driver": false, "custom_eval_function": "._evaluate at 0x7f5196324d40>", "framework": "tf", "num_cpus_for_driver": 1}, "time_since_restore": 1897.7146606445312, "timesteps_since_restore": 0, "iterations_since_restore": 500, "warmup_time": 13.401320934295654, "perf": {"cpu_util_percent": 15.12, "ram_util_percent": 10.599999999999998}} diff --git a/human_aware_rl/rllib/rllib.py b/human_aware_rl/rllib/rllib.py index 228fceff..1d65607b 100644 --- a/human_aware_rl/rllib/rllib.py +++ b/human_aware_rl/rllib/rllib.py @@ -7,8 +7,8 @@ from ray.tune.logger import UnifiedLogger from ray.tune.result import DEFAULT_RESULTS_DIR from ray.rllib.env.multi_agent_env import MultiAgentEnv -from ray.rllib.agents.callbacks import DefaultCallbacks -from ray.rllib.agents.ppo.ppo import PPOTrainer +from ray.rllib.algorithms.callbacks import DefaultCallbacks +from ray.rllib.agents.ppo import PPOTrainer from ray.rllib.models import ModelCatalog from human_aware_rl.rllib.utils import softmax, get_base_ae, get_required_arguments, iterable_equal from datetime import datetime @@ -140,10 +140,13 @@ def __init__(self, base_env, reward_shaping_factor=0.0, reward_shaping_horizon=0 self.reward_shaping_factor = reward_shaping_factor self.reward_shaping_horizon = reward_shaping_horizon self.use_phi = use_phi - self._setup_observation_space() - self.action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS)) self.anneal_bc_factor(0) - self.reset() + self._agent_ids = set(self.reset().keys()) + #fixes deprecation warnings + self._spaces_in_preferred_format = True + + + def _validate_featurize_fns(self, mapping): assert 'ppo' in mapping, "At least one ppo agent must be specified" @@ -166,12 +169,19 @@ def _validate_schedule(self, schedule): if (schedule[-1][0] < float('inf')): schedule.append((float('inf'), schedule[-1][1])) - def _setup_observation_space(self): - dummy_state = self.base_env.mdp.get_standard_start_state() + def _setup_action_space(self,agents): + action_sp = {} + for agent in agents: + action_sp[agent] = gym.spaces.Discrete(len(Action.ALL_ACTIONS)) + self.action_space = gym.spaces.Dict(action_sp) + self.shared_action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS)) + def _setup_observation_space(self,agents): + dummy_state = self.base_env.mdp.get_standard_start_state() #ppo observation featurize_fn_ppo = lambda state: self.base_env.lossless_state_encoding_mdp(state) obs_shape = featurize_fn_ppo(dummy_state)[0].shape + high = np.ones(obs_shape) * float("inf") low = np.ones(obs_shape) * 0 self.ppo_observation_space = gym.spaces.Box(np.float32(low), np.float32(high), dtype=np.float32) @@ -182,6 +192,16 @@ def _setup_observation_space(self): high = np.ones(obs_shape) * 100 low = np.ones(obs_shape) * -100 self.bc_observation_space = gym.spaces.Box(np.float32(low), np.float32(high), dtype=np.float32) + #hardcode mapping between action space and agent + ob_space = {} + for agent in agents: + if agent.startswith("ppo"): + ob_space[agent] = self.ppo_observation_space + else: + ob_space[agent] = self.bc_observation_space + self.observation_space = gym.spaces.Dict(ob_space) + + def _get_featurize_fn(self, agent_id): if agent_id.startswith('ppo'): @@ -209,7 +229,11 @@ def _populate_agents(self): # Ensure agent names are unique agents[0] = agents[0] + '_0' agents[1] = agents[1] + '_1' - + + #logically the action_space and the observation_space should be set along with the generated agents + #the agents are also randomized in each iteration if bc agents are allowed, which requires reestablishing the action & observation space + self._setup_action_space(agents) + self._setup_observation_space(agents) return agents def _anneal(self, start_v, curr_t, end_t, end_v=0, start_t=0): @@ -233,7 +257,8 @@ def step(self, action_dict): observation: formatted to be standard input for self.agent_idx's policy """ action = [action_dict[self.curr_agents[0]], action_dict[self.curr_agents[1]]] - assert all(self.action_space.contains(a) for a in action), "%r (%s) invalid"%(action, type(action)) + + assert all(self.action_space[agent].contains(action_dict[agent]) for agent in action_dict), "%r (%s) invalid"%(action, type(action)) joint_action = [Action.INDEX_TO_ACTION[a] for a in action] # take a step in the current base environment @@ -362,7 +387,7 @@ def on_episode_end(self, worker, base_env, policies, episode, **kwargs): shaped_reward (int) - total reward shaping reward the agent earned this episode """ # Get rllib.OvercookedMultiAgentEnv refernce from rllib wraper - env = base_env.get_unwrapped()[0] + env = base_env.get_sub_environments()[0] # Both agents share the same info so it doesn't matter whose we use, just use 0th agent's info_dict = episode.last_info_for(env.curr_agents[0]) @@ -507,8 +532,8 @@ def gen_trainer_from_params(params): if not ray.is_initialized(): init_params = { "ignore_reinit_error" : True, - "include_webui" : False, - "temp_dir" : params['ray_params']['temp_dir'], + "include_dashboard" : False, + "_temp_dir" : params['ray_params']['temp_dir'], "log_to_driver" : params['verbose'], "logging_level" : logging.INFO if params['verbose'] else logging.CRITICAL } @@ -533,16 +558,16 @@ def gen_policy(policy_type="ppo"): if policy_type == "ppo": config = { "model" : { - "custom_options" : model_params, + 'custom_model_config' : model_params, "custom_model" : "MyPPOModel" } } - return (None, env.ppo_observation_space, env.action_space, config) + return (None, env.ppo_observation_space, env.shared_action_space, config) elif policy_type == "bc": bc_cls = bc_params['bc_policy_cls'] bc_config = bc_params['bc_config'] - return (bc_cls, env.bc_observation_space, env.action_space, bc_config) + return (bc_cls, env.bc_observation_space, env.shared_action_space, bc_config) # Rllib compatible way of setting the directory we store agent checkpoints in logdir_prefix = "{0}_{1}_{2}".format(params["experiment_name"], params['training_params']['seed'], timestr) @@ -572,13 +597,13 @@ def custom_logger_creator(config): multi_agent_config['policies'] = { policy : gen_policy(policy) for policy in all_policies } - def select_policy(agent_id): + def select_policy(agent_id, episode, worker, **kwargs): if agent_id.startswith('ppo'): return 'ppo' if agent_id.startswith('bc'): return 'bc' multi_agent_config['policy_mapping_fn'] = select_policy - multi_agent_config['policies_to_train'] = 'ppo' + multi_agent_config['policies_to_train'] = {'ppo'} if "outer_shape" not in environment_params: environment_params["outer_shape"] = None @@ -592,7 +617,7 @@ def select_policy(agent_id): environment_params["outer_shape"], 'ppo', 'ppo' if self_play else 'bc', verbose=params['verbose']), "env_config" : environment_params, - "eager" : False, + "eager_tracing" : False, **training_params }, logger_creator=custom_logger_creator) return trainer @@ -605,8 +630,7 @@ def select_policy(agent_id): def save_trainer(trainer, params, path=None): """ Saves a serialized trainer checkpoint at `path`. If none provided, the default path is - ~/ray_results//checkpoint_/checkpoint- - + ~/ray_results//checkpoint_ Note that `params` should follow the same schema as the dict passed into `gen_trainer_from_params` """ # Save trainer @@ -624,7 +648,7 @@ def save_trainer(trainer, params, path=None): def load_trainer(save_path, true_num_workers=False): """ Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer` - Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory + Note that `save_path` is the full path to the checkpoint directory Additionally we decide if we want to use the same number of remote workers (see ray library Training APIs) as we store in the previous configuration, by default = False, we use only the local worker (see ray library API) @@ -634,35 +658,13 @@ def load_trainer(save_path, true_num_workers=False): with open(config_path, "rb") as f: # We use dill (instead of pickle) here because we must deserialize functions config = dill.load(f) - if not true_num_workers: # Override this param to lower overhead in trainer creation config['training_params']['num_workers'] = 0 - # Get un-trained trainer object with proper config - trainer = gen_trainer_from_params(config) - - # Load weights into dummy object - trainer.restore(save_path) - return trainer - -def load_trainer(save_path, true_num_workers=False): - """ - Returns a ray compatible trainer object that was previously saved at `save_path` by a call to `save_trainer` - Note that `save_path` is the full path to the checkpoint FILE, not the checkpoint directory - Additionally we decide if we want to use the same number of remote workers (see ray library Training APIs) - as we store in the previous configuration, by default = False, we use only the local worker - (see ray library API) - """ - # Read in params used to create trainer - config_path = os.path.join(os.path.dirname(save_path), "config.pkl") - with open(config_path, "rb") as f: - # We use dill (instead of pickle) here because we must deserialize functions - config = dill.load(f) - - if not true_num_workers: - # Override this param to lower overhead in trainer creation - config['training_params']['num_workers'] = 0 + if config["training_params"]["num_gpus"] == 1: + #all other configs for the server can be kept for local testing + config["training_params"]["num_gpus"] = 0 if "trained_example" in save_path: # For the unit testing we update the result directory in order to avoid an error @@ -670,7 +672,6 @@ def load_trainer(save_path, true_num_workers=False): # Get un-trained trainer object with proper config trainer = gen_trainer_from_params(config) - # Load weights into dummy object trainer.restore(save_path) return trainer diff --git a/requirements.txt b/requirements.txt index c8d63a0a..88810759 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,11 +7,11 @@ pymongo dill matplotlib requests -numpy==1.19.5 +numpy seaborn==0.9.0 pygame==1.9.5 -ray==0.8.5 +ray==2.0.0 protobuf -tensorflow==2.0.2 +tensorflow==2.10 -e ./overcooked_ai -e . \ No newline at end of file diff --git a/setup.py b/setup.py index 7b59f4f4..d2beb4d4 100644 --- a/setup.py +++ b/setup.py @@ -17,11 +17,11 @@ "dill", "matplotlib", "requests", - "numpy==1.19.5", + "numpy", "seaborn==0.9.0", "pygame==1.9.5", - "ray[rllib]==0.8.5", + "ray[rllib]==2.0.0", "protobuf", - "tensorflow==2.0.2", + "tensorflow==2.10", ], ) From 8e01e315d5a5f7730137e1153bf775d2e791e274 Mon Sep 17 00:00:00 2001 From: jyan1999 Date: Tue, 25 Oct 2022 12:57:14 -0700 Subject: [PATCH 37/38] Update Ray Updated ray[rllib] >= 2.0.0, Tensorflow == 2.10 Updated model configurations to comply with the new API --- human_aware_rl/ppo/ppo_rllib.py | 2 -- requirements.txt | 2 +- setup.py | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/human_aware_rl/ppo/ppo_rllib.py b/human_aware_rl/ppo/ppo_rllib.py index c2ec7160..d9ece769 100644 --- a/human_aware_rl/ppo/ppo_rllib.py +++ b/human_aware_rl/ppo/ppo_rllib.py @@ -69,7 +69,6 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k value_out = tf.keras.layers.Dense(1)(out) self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out]) - #self.register_variables(self.base_model.variables) def forward(self, input_dict, state=None, seq_lens=None): @@ -175,7 +174,6 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, name, **k inputs=[flattened_obs_inputs, seq_in, lstm_h_in, lstm_c_in], outputs=[layer_out, value_out, h_out, c_out] ) - #self.register_variables(self.base_model.variables) def forward_rnn(self, inputs, state, seq_lens): diff --git a/requirements.txt b/requirements.txt index 88810759..85b8092d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ requests numpy seaborn==0.9.0 pygame==1.9.5 -ray==2.0.0 +ray[rllib]>=2.0.0 protobuf tensorflow==2.10 -e ./overcooked_ai diff --git a/setup.py b/setup.py index d2beb4d4..4a12d57c 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ "numpy", "seaborn==0.9.0", "pygame==1.9.5", - "ray[rllib]==2.0.0", + "ray[rllib]>=2.0.0", "protobuf", "tensorflow==2.10", ], From 55c37ae04e77f867cf452a0df4c98642d2034027 Mon Sep 17 00:00:00 2001 From: jyan1999 <49133332+jyan1999@users.noreply.github.com> Date: Sun, 27 Nov 2022 15:55:00 -0800 Subject: [PATCH 38/38] Update README.md Overwrote master with neurips2019 commit. Updated readme. --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d58b30ab..855199f8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ # Human-Aware Reinforcement Learning -This code can be used to reproduce the results in the paper [On the Utility of Learning about Humans for Human-AI Coordination](https://arxiv.org/abs/1910.05789). *Note that this repository uses a specific older commit of the [overcooked_ai repository](https://github.com/HumanCompatibleAI/overcooked_ai)*, and should not be expected to work with the current version of that repository. +## :warning: DEPRECATION WARNING + +This repo is being deprecated and should no longer be used indepdently. This repo is now a module under the [overcooked_ai](https://github.com/HumanCompatibleAI/overcooked_ai/tree/master) project as we are in the process of consolidating several repos into one for convenience and better maintainability. + +This repo should now **only** be used to reproduce the results in the 2019 paper [On the Utility of Learning about Humans for Human-AI Coordination](https://arxiv.org/abs/1910.05789). + +*Note that this repository uses a specific older commit of the [overcooked_ai repository](https://github.com/HumanCompatibleAI/overcooked_ai)*, and should not be expected to work with the current version of that repository. To play the game with trained agents, you can use [Overcooked-Demo](https://github.com/HumanCompatibleAI/overcooked-demo).

rOFU3n!T-fg)3V_hei+z;zlZhbFLOAT z43?`(TKa+)?CIbED-OUc2R}aL>2`kXd3{xB_7y5spvxWN-{4_^1ATKOfL2w0WI<5bDx++~$#Chix-@IkP5(MZTX zBLn9HT}5Y(Nb<#}XApT8cQSC%TzoJ~+*rBx80O5ju-o~@4UVa5z&Oj7m32e!lQmQJ zkQGA8V`A1nRppro;pYD@=EO@rp(4irO!ye20fY&I#k8-K4^Id zgIC~SZWel5Bu@9R{(F&V)?FRAk+c|h`CS5Q|DB>MztiEb;~SzNdd+qYeUAg#T)et$ zBJUZs5pVR*!2`07$cr1gbgXa(Z;%?m)rSU%L>rQ*=95MAUWz6@w>+C?y1s2uNH#!MtM54T9yCp+fO!saU`0088|xp1k(|3 zRNmfE3f`CZ(fr3#sMWmvkh&&=))krY@_S22rJ*Tb^I5ja=Fmr!of1Ty)NaFkPy7wiYXcbNAO}ARvT?N06mGD8EO|eDCa1|GFx%G(umB+W zyc_oQePN-N7C7b0dOl|*!OTz@+OPK~E1JCpghAd|>=8lFG)XwcN?}^&v^^8bJZJIzeWuLJYgGOA{(qs3zb}z=XC9x)@X583s zHaF7QM>33`v)h**fL>TTDDQHF)V)(7Ha?gJ7%hWGy5mItAv!Qa*g}q!^ukyihkM<= ziyp5SglG4ru;i1&vETM%kmB`7^l_64@TN4F(icN)RxKpFcPCnPJRv{o*qF)?HAtx5 zBe=0C4srE0GN^@P!O3y>)P4?H#7VR8lsdtmb`8v2pvw~Dv_%T)LD*Vt0qrgeK&oU8 z^i7yUUCj2<=Zf_}X4}x`ULm;s+IToyBY>2o4=CysA=RmY?deC!?96;}e5)(0Zoi3} zEnQIM<6`D+GY&uI58+mW#XF4;(xfXxe%gr|t=S$~6MCu8f$LwWCu+UMtkpum!ww}1 zToz4Yn@^~~wB+YZwqGcpK2ZocPgb)GqhnyQpcrHCPXvq6`F79OR%79(o#^Qw#JoR$ z#+fh2x;yVdQ3y9-T763X8LS zu4cov7%f=iG6RQs=m48i41Y%lqo0EY zk?=0F>u7Nh9FVt1$EAnhW~~|~ng@~fC1FssTZd^t9vfMFovioqCyg57O`GeLpy6;l zYAeq|z{_g`OgfLo?uRcQ~}9X!4T>W`Ux& zB>rC53)OQYu-IY}zdPBG2KZOwivSBzdodx?cWBUxhC^um)|PBoU5q>WA7+_{ zdg%L^z*ek0h%3Itkng!77#Ze{iZT>4qV?#7Fh|%hpH%s}8PPG7a!l2sgnM-w3IFce z!j+yW&=605>YccSTCd+n=T54oo_|xRiS1DyP!z!JR;JS%&`8ylkJH5;L~xmZrtaOn zEHp^OpSwv^85+l6%@RFceOHwq&pyQEbk(5uX*y)ltML18CJeK5!%D+<@Jd#Sw~5)~ z@iY23EbKHczYx!2N<1)0cMDF*Dg=R{K)4@Mu+N}`hjcBbpUArx73GLO+1LlHhMw%cq`npV+W?6dWoaw1Anpa zJw_gvq#+&yY4-68+;W2@>TD^&G7E1K{=pscix*+;B+8?2*+TfhJM3TNk{dRiU?$&2 zn$-%?TQwK9>t}=FkOXMF4(3{t z8w5rZOYyws9H{R2ENcCoPj~laV(-GS|K$Jp;Hn^8@Jf+xdpDy>^^kg%#KPD76ibD_ z5vMWk>_eCpqE6*sjHIsyN6>kf-q9x^Quh1BEGK!?(5iw-XJAphtuW{ODk@iPz{-A| z;d`D8q?yZ}QjKzL;S}rB+&rNN-jAvv$A`USwSyW-%izzj(`o`4vU4%$Ke%Mqof*d( zUtc19E_v9lu@L8tbrx*!Pl4&PQbnN;HlbUMwqR7w7h++HSbeX8H8!uZ8yjj&mi%yH z-?udj=5IO7j&J_N2Gs4x(``XuxlxWQM0SW~X}Z&`$Ni~AmJyGRok`a;S7XBCMQn@E z54$E8{OXKz8q48)XSHCQq!0H58NPd5y5 zg1NsEfR5Is*wPXV z&tW1Gq;Y`sT}!5ZOE&XcuQDOJ?GR5GlnfoZ7F>7PbfRqHjN)=XVT(F1^=-WjGMuyOD6%stlEdtt4o1n*vie0D2q!gR8L%Ta?b6G4C4Ay`ovjz3=QgV4Jzf*jlVIAX6c=9;+i8#-Hf zDi7gz+QazE>@Qfias<6yV@y+|bTLFcn^v^14{xt&LuqMwxKd-yTRP<6Dk;FbEmL@! z(GHxpsgOJy94)e~?h;%`Um_Z%pHAateZ}%iZLG-lhP5?wneX{@%>J_g0^Cg@F=!0P z?wATmyHA1oE))JZ>=bIR7>zen&lBDFRCc_4HyGbhCo_f&g0|v`+~@fnqHdgs6Fd&BFao2^&thie^QY zLWyKGTU=8{R*hK6epf082G6oVB5W5~2CKo&>d&I(e!GZ*$u%aPL&W-*7VI25Sm|It z6rbs>Af3L`&?ZYjlrm~rtidvPr6ft7U3wg z!N<_6&{XaTITZ!0wrLa_;qJ~Sd&k4UwXa1LNfvDJ#$Z^ZHXP;%m2vK@EVg9-SQt-+ z(&*F0SkTkR_@&Q+XCBE1bih^>VyAt`Jv@cNetz zy=SuFk>D1O_-z6@Ts-hl`8X_$-!**2lH_#g<+HnR)sN|zvL+HM znzPBwC1d#W9kXaacB)`{v?KSWVRS>^bV2|3f!M^(@$WM2Fz@^k_Uv9GnKfgJjHfr~$_;m?hG8av?dkDinC6=R0Iy(Q%71i zME?_h^)K$LfAJZvr>Ekm%u1H#JcM^|Z(+|8RC&@QGxnHGW|zNjg~oY5$dI`wNO^IK zZK;6-ta_dSm@1E8Wi}TDSG5wEqIloTOX7Z)(m+deJ+e$NJ7Y+t#;I~aSnDNn zF5Q=|?Xscc)f!-@Duw&G>zK{ARlNSkFnTa27al4(gO$`Z!HJ)WRoXL}$)nmX*r6E# z7F?bly&44{|3uMA#n)l=Q&*5IJj^6h3&`ucN4V8}GaUb70OkcXu%a6&Fk(R*&e@p; zA-WR0q1PLIwkz{V_yzhZ8`#Bbj<_M}6K?YlVH+f*VI`RHg{PX?9BW@bH`#$3{`ynd z^^q{MaS^coPZxA~T_^W66j0So2YS4Zpq5R9=wDoqC;B-=bie?$r?-e&rv37T0 zhS6ShbdyAV$%jP#*;JbRO%uL5%7R2fpJ>viRy1?(6s5kZg$X`?(f6_&iL0?d)pNZd zjbotUZr3al~;IZZ!@$Yggelpnufzc9F@I{JVb(gmD z8f3{9JTAwk2SSQzYPd!ezzXKCBrg|Kh^8oSVvApSkTcGJo2L$-2PfH71`XCF?HA?{ zLEQ;a-1pN=a*>#W-0c<28<~Z@+bnQ{l90`5I>oH)qfueBB^KT*VcOC5%)20g{JCLB z2S{tv)}NO^P&yR7Z6eu}R)F_gmcU%exkP?lD%v*6LznMWHlXz|c6v@@-37^PN5)m~ zNs{IBmG|S1bPr-YYZDOj1ZbGok96KV&0OE@gMN$j+3HKPSp0}^;;=uSd`n+TY?qag zFE;utLqQw!YOa#Ok6$zM9Z`1j&tln)h(~0|h%M~$@>-Jrv>zKe?*hAKGeVFU6f3gQ z*&>p3vLrm|UF8XhJEXDX1&fOCAWO1z@V1u=ntP|Ih>~V-TE%G6`!d9Bes=MQQ$%Y^o}XBfCEnH^R>3J-e^!R}*|$<*%0qNVv1+ddGs<6H{+bl-^TpDe^% z9>u`7If7+DhrmqEncD95MxD8zVXH$fzAG&k+=zK1xFT)H@-?(^Yl;V0Eo>G9D$fS% zF$dvdU=Ap76WsZ5I-9yI*zVD}OKk6RODMg4kJx{mN$$_=MmM`_>^&3;Y>Lf^^Y9eB z)<2bvj8wJFdo#S2oYPLdX<29C)(>|4<(JH;X{P;rnT zD(mIRqm%Y*{vtn7lGAa!=?SYa#i@wcTAD%D@{@uazNd-S@G}Cn;;F1>vI^8*QY415 z)5sXZU})5wh|#hEblg-AIJ~)>=v~dnHOC&2K--!82yTWn_3_*~QHRz~S|qX@D9s~% zHlyFUqlgW!$yF(3*l=YzPJYs#W9>D#@yr@kHqD30h2l+5Y>dGt%aPnDyd+q2MhVWe zAiPVs4fA4Z$$p7LFe&l?nF%`3n9)PFr!2)Gw?fJ7kV3)6j5~t(G#7MDxCk=0_6TbG zN0N=@#^B)d0k!T9gMm4!aQRIP912Vng*?@ShU$OQ!=kWkEM&_b#V<{3Q0Gc2-t))+ zLE(C$`!Et*PYmK^W$7^7dJ`Y!p949&=Tol@4XkF-FxXBFcxHVCm>TquEzwA&$6QCB z>=YRED^p~VS4HhFi2L|+t!Yx@OsK4!OVim}jPv(l2irr)Y|XhaQ__jgqyY0J>Ot*# zJ{;fX1h-X_Ny}Ibqzm6-yksshzT*qIi3g!uF9G@+ghO^ustAU*2)-8$qElBa!okb7 z(G~s;`24OvnBq_#B_-~olDA?;PV-=*-4=MLKY;IL`p^&rxcI>$>h5uy?)%}&g9Gnl z%dQ5tLh2es=4~e~BM-r@NI5?2yf-`*f8N&XkIB*FfuinxcKogPE+{>}h&RsbhqEP# zX#AfB?5Z&0J7mA3aA5&teB~%3^_ffvIL3^17Qo7_BGIosmOO85CF;ld5~oW>%))LD zd+Y8^S6e<69QnB#%$!u1+RsYCuJ00bg}e&tR6hZs))}_#YXfW9WCQ2V+<>2FoS=Dk zI*iU!WG_1ma9wXgrSVl0aoxC=NEcobDPGt^mT0t)>cCIT;LLuKw8D@~@USF*GFIZ_ z9e+jUxshb;jj_zkc!{9RR7o5ax5NHf*7){ie+ZNch3dcA=u+!~)S?27WLA)uMsJvJ z*E4c{Xa(7GzS?dZJuXo1IsrYuGNC*(1~VQ;ko=jGKr&`F21gykX;)&AjI_jGYTpFv zhoXt4`C1%s>Ne3ayg>|=It2~>4Xa(^aCA{TGw>8nqbxeFnAiDTHs{edOh-LP6-kB$0`1FF9{L z5mG|8W7+kQ;D2Bwk2_*VYm@^;3U|g~zUmR;-@J)z)kuMlt>wgJaT63dyoaI-+WeBq zGf}|(A~ry4Al+=93WIk}g_qGIxng#iU4%*!Z@m}8a`Kx=VEI+NP_!L3I#-I0>3^=#=Qqqy8B0q=~M=QEbp+Z)i4iB>o>tDu0hQ6qHOq zC@KuvjN_hF5j}@tOzy%*!CbdRFg{0tJwHE)y&t9z-NW~hGUq>HwlGpW&m~O`{@ji~ zt_L%>FUiCqD^C>qXETnL^=7K;;zT8Sau8kgitN;QO*Shh{+%twElF4|O*&WFoOvUwK|LRi>if!=F zfZ24`B|yYjPc~Zjq|Hc?;0ySKUA8Qt6ZWpKd<9!B_|3; zub(PBF{FUrl6B^rpS%O_s0^mql1`hQ>i$Vie7`@eYPHW4de(kH)$TOMs?Hr-?B#k+ z2+wzCp>(OSaM|eGs>gA5RlXa0goQRAp!c=Q*^f%ogdd;v(YIxNeA{V5?l0pfRN8n6 zKKk_1q@T%EQVXtg>AJ(Xe@+)I{j9+s4X|NLUdqv*-zs3pMkjL4r-CI7Jd8dPoj7Gz z3GDEA0AX;L$q1Eb@a+Jce&Z>s$J(JqU4YdcskCB|4UjRaqJ2x} zz{j8>a$~|$xMihBGP{<5{jWq;eSA0W@Z3jc&jWEU^Gl{RVgUHbGg5NnJGnBq1*V+4 zfuUjZS#ReSsClG{z0u)p{^3r+swd-N)W-p1+YxJe*v?e|7^wI(i zqXXfmiUpstYzlv+vWjfYYC{bf6<%;K7bLa~2c4@0#8lOZ==B^B-Fub(Pp-MS+dw#~ z>Pt@ayC&$W?j-R~tVHUoUSVup0<_GY%1<4ww6k9%2a_1#`*ho2)%FMweeZ!0T|e3U z$iB*1L9bv<$~3wn*b}}?-bXhcYZ0w%eS+PF3jFif+srI)IlE#OBG{y$&aK{0!dHhP ziOn5DIPqo|sGVMjYH|HRqt1!GT)UKNdwqnCH~`A|Zp6@KfM^!`lg;#4}(VcvD{o;Pq?~TIl#ieYX!Ey}tk;2UE>!?^P&zHKNCO&&`JkbKQY53$?zn5Aw8tBALi!E5N~|Smil(^uLk8Gfkbqv* zArLOEiTh34zy|(`fQefRnZ=c1=>2dyeXgTStfl)wN!M05-Qas@L25F^9^(k z7vYw@fX9DUV|>LT7~mBJ?JxJU0iLn=;P+_q+Aa|be?@WIo$VmMu|JjUngPYj^ypKi zCeegR`&sbvHq!G$jyEOOu!|wDA^&(kS}W&5r}g|MIa{`~C#H{_@F=)M8zENxF3?tb)0w5F<>sI=;0^|^yYZLSo0xh`U}tScbj z?=BkNl4mWCud>_YtS}(rlxT=zHn!bG(y3@6hzg%0e%7C6`EjQ~j)${KR~v4BG!Uhu zUD$cKYpk)_5=PxXGJDA~4K02hM3;A*8XbZjf4utc6GxHwSQ{E6I>F@&Q-j*#8Dsg+3&cf#8F-_Rm9mdIEg zgZP46*rTEcuUs6#`=BO94w=UGx-F$k%TL1TQO;E9Km1Bl%&(Hg{OYWjUo92$s}3>0 zauf3_-T&~b{bGJqDCSpZ#QaJspa+JE`PFr;Q{4DWE7&fG0J{Q( zswqcr@&iW)(>q^FA+GK?E-5`nk}D6=1zQt^f5*S3S|K-i`W{F6G*=TM)GOe{rzmcE zLRsjc7E9+;P2~OVm(eYDzy~eQhSWDXw693G~d2sJ)Nf%O#|vx(YWG1Jx~(O2NpyVDHChD&3ppx*gTx44UcA(pQ>P4 zzYlm}h%z_dq`|umyuqvTT6T7EVc0!Qf*dnRB0-}*;dH+O9C=7mSYsmiC%IkS)n1tE zxm$Sfku0}(yqU%ehww8VPq~M8fv{QITv%|dm`FUGj&H15tG?9gaxZH=+?Jk6XI*$-BNotb}h}H`}XK zFXK7t62kp8d#g^G6Jf*Q!}fx2#Qyl(({!T$K%BbL=ImTk^|Q3DgzKF$vY&P15;YzY z59Xzd?E^BZh0WZHUXS({7T+0DmA65sYRS6jsxvJTLiO$@F4NQqKfg~Rn!y?(`GRpU zWx{Ca#EGa-;tT%McaSeno{OM$5!s~)IA?e&ydG&m_P-4zelL2UF5)V@^~)#ylReQp zV6ts9;KN)E+$xf!gQk7Myp!_$!OI#<>Mei>?M{>tkJM{z+6JqInoQ?r z8yfkEP`1bsrq?W{7t6n)M&L=@Xr0TgT=VfteVlmidInT37taP)9O26APgCd5%eZmU z>M?2o&*@z8CQD0i9Rnrp$1qpk3vaG5;+Cs@=rFT!basUapKw#1#?Nd-52Lemn4uv* z8};*_T=RP4>w=<2UA9YY3>`i{4IWR^2cyROSUAv(W{+Aao>MvoZBep#Bx63mDD@j= zCjY8@)b|*o-pS(D33)VDiqbU4OZ1iUSNyH-Lig*RV4s^_ix%0+^WPgh!E9<29Fd(2 zC(|tGy8SPh=lBAMR}s>fs5)5FHiDm8mII*;jr8wa8!GdB3AJ1G6RbB~=9SBm!D{0+ zFt=~PzTud^7!CqMA#f(uIAa()qN&$_T)_xCg1xm7=H_lvP!_!ae_M*kw4JKCbPLnem@-%E<&+b2U(;B0U+yv!(fh@d*%_ z;{aO%$MKyG;w(_R8=n?EM%ndEM3Ol37W4Z|<@*VeylOvWE!fP)zqvwEe=kGjIp#D# zm`^@9tcF{oM66pq9N+#<0NJn_NIcn2pn4y)PFeD*rx;Ds#eS0mAzU?6lIsRu zXRf-rq|r=;U}Y0bn<>rZB3y99Qf-i%RE$H@x}on`FwKawh8WQn@%p=X9i&Z`4}Ek1 z4SM6qpP^b98sp2hhS|dfeJ+@4`b{+X@;0*jnkkMnY=aNy%E1wH@xW#UntQ4gnioER zfF-7M!LE-mKlKxPFvk=Z%qk)tAKt^wl<%xILOlO*Zk%X%T0b6WF%CZec_vsVw8hQA zkI9(Xd*J@jZW2~{3?rNfS#{wX?D)P41I05v8|F@=58@1{{#Y%#AWoI{UnmW~e*H$X zIxXtBS(6TP@)VR$uZ9_#I&gON9{#x}6@I@7V^_V*dB40Qq~ckun53VqXX{5W4iwO~ zAxA|mH`bw9z&tp*ssu-#xX((yD`DSA16)4u6U)u&4~8XGxe+7LqTMCU&x0mj)u3Y1F`XUCCh%j2XCsZgkJ-k*t#N57SnP7yqfQer(g~ftwYXG zHl`BA77A|Sxr~?fx!~vuPf%V*_*m_DGRAKS_FQnpWM+&?ZpAS7{T?o{(-~E^P2mH+ z@4{ef z#mgOkSf#=s2-_1wb$Knjbt)ax!o}I|mzUkk#8cqmJcYV?j^dX4L*Yb365p4T1`6eR z@L><*+LH5NQ>n1E4cL}>+ea9Eq6d-7H4tzV4%S*PdrHv(TU=yjJGBaZ6zPX{;bay|o z?~a3$dwt>A%5&)b)0u8wvYff(iTfNUE~2M3^90s|s%ft7e%fjpizYv`cqdKf8(wR2 zyIuXc=jHzNzlcfy$A@N#{XPL=zt1(X-{-#A@3TVe_X!aDePYFapK7t+M|IEt=l9tw z_WNuW`+Z8qexK7~zfZH+?=wp5_n9E}`#2mr00Ob!r}s+|j{A?_M>Kn>P(*Ecn05j5VklX3sv0%sUU10)JDbdJq~rKvE~Er>|F#M)yg!n*bN&M zl<@7$XTT*a>64#DsFqd+NiEIrC@%q*sl;O0gJ_&)c7}U>dx+zwhX`DHq_HQV11oyh zVRnNRrz)W|J!TJ8UKmaro{oX-$HF2Nt^XOI>j8(Od^8@Fu?hli8WzI7?qNId} zDj4wJ7oDs0lgHAl1(Dp?S-gc%-Ai_4)G+??a2M~)t>u$cJJ`^zSy*q8%dg9=uCgBg z3nK0U-FKk_Cl}1%Df-_@=dzlAl6M=ASkaB+lKD4Nd*SelQGEMXC$3j;wkmqdEl7-2 zJe&DtxUeF01@{|hCREDkz#m^HRb2>ZsT%4srD|$ZOVxtxd!Th%zv@uV*0bks+@j`N z_KEgqAQ#yfi;{lk(HAdAS1I?}6V)~9m^N{m-4FA89I^Z__!!&}%(L!?@+;?J=h7X!z#r0L?p_B6D; z7?hXRvCVGhX~ErctgS8KW6uhy^2mPF-EBSn+#uj}i)X{N2SdPYz(pSIeGF{wjG!(e zYwWsx8k*XFkOe!$>x6ob#r<|0!Lso=hNc(LaZg0lOi7ndqviO4wa_DC=TOax0=#h6 z9GcH}!9lf$$k(O9iCuMg^7achW4adEQY}e`cf1ElbxZWhZ22emuG+khZ1)z1ZeG^p zz0rDf9;8A)U-iaC&U5I-J@I@ec@MWEt$Bc8B$ipP7SHip;ecy}@KNs%?w(>u8=DJA zpSPOuO;tGNC$6IluUep*ObjTe=iyJyZqOTV%{IC1s5G^|g`F?5A!A=urKx5tKAND- zcg(&{z0aJ6I0ZB4K0BSe`os`H!(nv)9>4{TL6EE-0%r$j;*-nnw5-QWSi4EUL(}sy zet0HbAkIW>!reS9|2G;p?C0lK=)t(xJLt&SSDEHwbGUVQ9W%I94f9@d^3nMzPWW+{ zmK|~AcH;#YAE^Q(D=tD?z$dWkodvz@8V~wAfV-7>(z^3fe1k_U|5A974)GjK?dm3j zHBUiPqy98t?Mh58NW>M6M@XQ~WK_KRi9OVK3S}KT=;vo+E5kHC!p=EcdG!h{Y^^wo zlLE%^Bh!5FYuI6SGIBR*pR^xiPrRZIEAFDo&p4)|*8zq*?}=S2vr(ejfCrVB^74*> zkkkBxJQ$n56VG2bP_F%T4hV&ghi>q#ZAa@(vQQdJWnz#z^{XiEYf2LP( zE>M*Uf*33{TaV{63Yh-sND`Hw4lXVOq0e1|S|;BUol8{XqJ_%z@?QxIb)SVNE)EpC z5K_n<=QprM(+O7hwg|RXX|Qh=`wfD5=%Z}(Oz|`*EJYd zoT4N#+Y)je49UWQNER2TSY6 zL8RPuFrMqrOlQ7lax)z9t$hjzyZZC}!fG(gmP4VE8@{=7QQS|nonS){<1+SKU2Z)r zzb%J7>#WdSeJx#dYd^fuz5+#+?d;yX%Yy&oK=WVx;rkV_)YwQoBl__ycbeFbr|py! zCV5R2K3{1|1)4HcR}W z1HJyinJQcy%bnIq^P%??vFURfH}+nH_3A2g%Ro)KP(GQoS9QXt%>wK>=*(uH)1|_S zIn;LUF|NAf5v=%SOe>9+vBh>p=vb@{Nnr`J-It@*{6yY=b120N;fBcJ+96jMLvE6AzY*lZPz%VM9*kHz5 z4R^9>uavQo1+(ilnegW0DLg*Lg@t@o#9t$H*-_6eg1H~cMQvx#h<1c}G09i90>2BE z+|vY$66RNQd0P};UNm&Msi7o2>PHW z21dMz;SC?f?u-f3sY7-;e4Me6d22YZaWCZHR={X(vCfZnev71g6JNr?AR{pSG7J63 z%FtEy4`Gfdn7GN-z=Ov3u%vMVmuZPa?!Sh9tBb+|#m``#ta!%b`z^Fx_nJsVC5vX8 z&-y1kF)LabwT2Bs=VnFP7kGuFeV7e%dYs`|xhK6ld@b)=*9hXRYj{j(I~1h3Q}_9~ zI2Z1TG!JdS@tdRQ^yw#9^g%75l%W)#G<`Akow^%me_Bo#Od7(jEpeqAE!Ck>?>4hl z@PYyI`8@s18thmUOsg&f&ztW+#`#AH9(*ceZY%!c)QK|W*6`0vYk@h2>z%__Lk|ef z7mnj$o@@DwASW<26|vCX5;Q1^r&CA20t3aPn0Zu%dpzqR?f1iY-WU-!bUk7-0 zRzU5O=kQlFn|_?~0d|E6V8E$2*lVoHul5>nGDaOePYmJT4h9nGDGx*vOH{FJSQMLk z@+h-8IE4EDC1-rStzhecWq3|-xzgZ4A!JC(vgxlMqrn|_9dH(qQs_sro#(?bPiy>LZ^*+hY{k>( zM#ANqkE~Rwi!_(Y@KJgRxN@J0An%ViW;U%OGLQUVesmvubRi4NzaGOp$9Cdb*}X%mHi&TCYk+$oRLdGLCXk>qsF7Lb`Nyb&SN<> zz9fR2CS7m3;NFWl%p0HE?KwDz+~6@7mG+cCSOO_teht!FmH1*m9rQ{3!D6U%Yq3;aO^UpezesRiq21wb_!A-Gzd~h7_a3Tj#9Wf1U+hP((@V0dT?n z5wY$JWNh5+$~2W@#5GJ7h27nv8s`U?S}hM!hpn(wx>XQ4WDz^H#DsWkdjK0VoWSZw z7pe4{jNkenV|E?#tiS0e{Nq+F8u7olyVHNFyRdEGWK4uYB}GILMX}epwn#~aCWT6K zX`bhikSR%~gfb+hD5YYrb5VwrW|am+Q7EOkG!M_ZKhO2z{tJ3>{{xrLWoz%>K904% zhl?4H?B5_+%ogL-3&KQMJOC#;TtNK|a$N6aD=s!EkWCZkCsy6O1o7dcDZi-4H72!y zg15j?r!e?>M~;1U`)arMpf-#uzDBeXmQde_{?z7Xe~7EIu-z7ygcIjDku9~Mg#|ln z$7#MO`jh^Qz zJ$1B_cJ$nZ#XGw&F=)gKwG?@Buh$TFy*nNA^(*b18%=w*d7`SpQ`-9R0kx0ur5o)U z`EJD*{M7L4{Asu!AGl%4KY8E%7oWAI!uL5Ne4pdO_t6!;&)ffepKrqV=_-7m)57<8 zA$*@&;rm<{zK@CUecFWYvtIZ<=Y;R$EqoskzE7I)eSQevCqno>A4<-HoA7<~h3~Uj z_&$EZ%xpiPBkO+n0elo@<|AQdt`uhGY++{J6lP|mFf-o@GxMG>Gbai&bG%*^M)%p{$XWt)YWsrjFoxm}o|3w(fiIw~FUK`8W74eplf`=fbd@RI!`;(r@grp)m^#V%fF$yj|07<HdA0>Te|DyUou|M-Q$60x_6;b%;cUajzhuwlxqRKhA8<)Y0;ZJ> zIO172eImM>tihLLE5~1hD;2h^d*Eq2IVM_WxlNmEPhN)IJj`fh#uldjL!TDkUWvM+ zf1qh|0?T+;EZOl{ffoClQ|I}^*a3(8r1e(-oO_uqSzOs6+wPMNLsH}^n;M7nPQ4@A zo194Xw}E_rXy@2KYy_aiT>V?x5S#yhPU(wF-y<}ToD?YsWH@dzyV3j6OxG-oT@Bk@U zR{VgJ^;?8m!^aY@4_(3Z7tjOji20l1L zT1pl9U*~oB;KvL+Ty`4gI3YM|V?@O32T*Ua;&-GCnrE0E-%?Gc}Wga4p6Z=lG*!==sgqFL4Lc_&bnG zpT*&^$|kn%xf9K6kHM_k5vaQB1dJ`ShUR(OaA$VCL~Uq;EGPSoY~&kN(rC1V1Xvxk zySut48at~&t!yHm*x4EXcAN)|y?XE@JY2SI;qt;!M;tJ?OA2!=2x8{(CYU{514?4| zV8BxkNlsydq^ivy)dtPMiF;dYUrg{McaKd5)08^syY`o?y~SO&MCl&8F!dxWB?GL7#z$YO!VgcN#Jc411Mha?*&he^IPcV|7_dr7`_b%oVeJjoz9jKjXh$PxmR(0JZ^-Qh}dSYC8m=e6zP zeB+_#C^dS1b}@KX67ks;%AO7lfnSIGS!888F8-W>8*(;cMM{vQ^C8Nf27QwF46cyb z;~-2^DVLqym`nD}{>4HomSOJD-IBLYr-I4)#j*ycl`MFEicH>DQDicMQT<3Zd;CTX zP1}rUzPKaw0^$g{)P@?yqfgUGZ12}x1>Oh!HZfJ2+B$o($i@a@@VR+6%u zxG4I=w@r2M=dJK~Tw39x%Tc17d5JWu$zkuEg|f$kl9~DOad^i`2Qrr_@(XciB-?#e z=pl!j@c2y+?s#G>Y%;tm`#bp=J7CaCs%B26fAji4;Op~XBc8wecU6>iO4Z~)J2GL` zhgA5eCkG88zna2z*m99y*^B(@uE?)uiu`K8fB99d@TrwWezjZVSMx=F^;hIqPXFas z@gl$a`(J)_N#s|3N ze%g@1+pi|jkK+FO1-cWrK5(aDaZ}Kx-*2pMD}~i-7tl6yBYY=Z+*%hsdIl8uyS<(G zgzo)FOP?Ej{^oyUl znNM0eCblX6lkWyQhZalb7Iee6vHH>nkNm0Y(Y>?w`erojEJlKF>kC+O)X4n>dp z>q(tPr2dm<>Vxi`iD&9CipHMoCjC{_AQcpf zefqSM%<$$lZoMtYJ|S&)k=weWqI1R3QXT(%>5-;*X>iJG8a}q52%-J{ofk;|&7Yw0=1EpG<{bF= zD1q|6iD090huKOL*c4?A(AaWIa^Pbjqz{kA+?fxU-SsT&EAu7s@#3?nXdThb_(-nv zb&ykd3e?OGNUpkHWnWjT;l$NueB6qA?8+E=C;)A7j%7 z1`3ZS9A@2~LmxGNW)~hVB6}URxJjlt-#4QQ`)ZUE=b={A)Xp6*Y(I_O&C}?@zVAqW zaCfSIB^jE$oY3jO6C!A*s~@O#!E=$*Bc9bem3>};Kd6r$)=dXFMkdhUU`Naa!DA+a6dsN{MD(Z4`EiDW-QF2s*3hKx$h* z-sg7=t=$-kKUI5TQ^5hr^e|Pp>JY})9X|o@z4yrivS;GT&>;BJGln*;J_FM}_2mOp zN$-AqG1YSlPCIByQW={8@IuqB;NuDHVj z{#xRJ#4z46+nm?WtU%52r|E5fQ&RnB4eUB&B>J=w7!O(a+~1QfvA%%G9*3ZH=3uDW zJ(RtDJPP0CS@3!85#+`Q@!BtX1lPA00o`K>(gCoT` zQce$DfIop+FknL^R%U*~z&I6}BJTQEC|+Zpeiu>YZZ)nldkXsIUFfl2Sy=VanzkQw zknI>!XsguZ#BYY(!?x%+Jb5+N_FP*i3@TK`n3BGp$T8T z3Ykj8S{Uv8g;e@ICG$2&->jg}UO589$iu6?5u;C>3tI z{k2=PT8-aVjwE9bg`i1I7q;hPU+O#80zK?>NOs@rkkRsv1^Qitk^ApKi=sAVO#^sa zaA$h9)I??%2rPAs6b8thB=Jsq&`Iqs%s%swrK?|prBluDyP=ubWUYYLio@|*h8tZm z)eC0CG()#3+o=6{Ro-dLGh9DnBCXX1NIZ22<_duY`-^ zt@+$AXLueE3<0aU(kcBL$VU6u1eyzA<(n<|@>Us|{f>gbdAD##S_w0H;smA>s+pmm zJT#TbVTODz(NtCd(|Nu5oSrdI)7_a*Z@3}Z`^yxDZED6Px|#U!Mi^?Eq`|Obn|O}G zTZy813iMnNjxmpy!;^bvAibx~FH2^VbIPF@{&OO{{=FTQoW`-g(Y9pyePREvRYNW)l1}8!6Lu1xspWR+xXK!Tg{^KMqPTu zutm~n97-pzkVsoR%IJ@qhG?ZRmd5xkDC$d3@{W5-Qk%D$e7J!dBy@R2%^yFc4@#Y- z?-g5V@0s(!iyq;|v*+;5QKke?qs@3OxPkF8cN80J>z( z37jQ82hHm$Bn{=Kz=77V6FysT%Fv!P$D|J!iu;B5!;5v>-&%rcdK^4|V0p5gw*&)9g_4cqrDtSCQlE+_W1)bJoO|FdE`fD2RFf*PQ~P! z^LT1Jx`3!HKF!H=pMUavm1bo~7dwQ|0_ki%YCsqt<#dc#>}%qco5E>Z4{^XAgyMdcx?bYO`-|Ff%z zM=Cb(=traf$^S=PhOM->Nq0K%kTGu_whe`FD{Ul$sh+*HG-aMv(eTb1{A-O1P0(Ff ze01=lq7}wNi>#u4^HZchl{lM8FYi>5`XBGG*G{`p6qjODT;bmdhVE75Y4?wFRfXaF z(W0x^cd;&Hb($@k?==rfBMxJw{S|!t*`42>xQO`8sbS|g?fZgkQ3 zHF)ZAH~Q}BQE<<5rESZUdAE&m&~48Qe!6NBe6m&Mx~ppWJkM&* zzq%jrR64H{-9b*#S3G`Y2kV$IlTPrd1H~&DcGgZs#QsG%^}X&%LpN$kr=(2c5jU)< zf6aZiJj@OnD(|A5))+{d%s}bA2cJ7cRr)f2Km06-0r!IyRQr533oKX#Ea4QaaHJzt7}jrVY5<*ufQCck=J^wm?|ySzPeV zk-nQbjw((!A;nX^;K-ma5Up?$b{*_Woo~rl(YqcjeDH4Bb?XzTRFH*j$-^YqA|kL~ zMj&gyV@Njy<+7w^PkL+bLmVgemlP)?;-PCm)+Of?_mly+Ed4MrouRxeWGjuVv;wt; z1*FVbgHGAr!m`H=;nqwWC$G6AD~Z^FDrFVmJ5LY2Ogd2|v3ok=@!x9%$;Uf@#&Nw7~HfWbhyoGN*;5PrQjb<;Tdw zakflzDGelSS@_&-omGTEPUz;;ZsKlpZdD+sl$a&y-)bmm;dvr8--6@ zo}3^V8#aQ^5k7V0&v6oE;ZxrcK6SnDsb>hExvz82DJ7l%{(w8MPth1*;`brRL7@h(zzzRin=odVk=RsQl_CcL=g z&7U}GqiUyDIQ_8|Z>g-8ZS-&)u118jhbX{q;3i0`FFQ&|KzCYzVLm1ble16;rp}- z-^WR8zFg`1|2JPGV)JE**nF8SHeZH{&6jIp^QAy+zSN1$mw@g6fAdB9-{#A1vH7w} zY`&z6%@=dA`C`54G**kvmmsnEa#C!*WQfg|!dt^2Tx`C$wD{6N>_&qjskw%a0xp+#`B1uivQ&GXnnP)=+Hr9y7W*Frknf&+w>5Ywp#GJ zgBBMpRXf1X4DhF6K>>Wqx5?7YbI+kk`kbOY29fkc^Aoc6uRqqE4dUar=}M0bG~#zU zo#*jt%cQm8j9UA==SNV94w^HM|BL7Lurmoo4|`e_?dZ0ZncLjvU3)}Jo21=}o2no3 zHu{8@tcsT=P0}tJ_I;~V@j?wBENv|M)cK^eKs|(Rss2M7r^XbW*H|Fs{hPT}LQ#?C z0*I$$-C%OUJrPJ!D^1%&}VD1zdBDA#m3xCUKsNk!3p2d&XaK#Ofic zzq$$aS>b4Ab(M(b5aRdx({JKD`<^?Mcy`z*>f7TUMCNP5lpVrFxfnuz%Jsvr097d7 zQwVygJHX@CQ?Ne1niNd^09m=~P(7xO9cw#-UY_ega6A~LqeUkTa>PxQgXqoFp_nf2 zdaTGlKv(|IWjl|rr|Ld?U|P?;bm5S6xTnyI4xBxhCu}mJ&+{kp^ULF4B)x~LCP%Qz zJF9Rx@5Scq??+eZ&!C?gx7&60I*5zNHC*r`f~lOa#kU*I|C7I6GW9Uq;2eaG`%7Vz zr5!(K{+F(`u*J)Bi=k)PQLgXNnU=Lg^TsP9=(yD>^oG)QF#o2Fa#2xOIC2y{ajpt3 z90BR;wVM1<=?2=hKo6S-5Zdp36gKwx1jk2)voqZdA+o7AY$#3OEwA=XNHc<v^(u(=0V}=aIWJSN1uE$0qx0-9&GwJsN_G_KoFw@=4^X?=xr;&yDRfV<7QMSH9#|XX+Q~K!3(5@r~Gz z-{}#=JCP#nxoJCUaA%Oa>H@hV`@zQvQrHqOfR>a+(`UvJ&?1dxp~sJ7eX*Hjei4U~ zpv&aMbD*m?=0bvFEL<2ekiRAfA65sUX{i!-kto?29f^XZQ7@Uiof5wtT8^jJA1v(i zU<~R0VzoKWBkrduGWmRWJ-l zVOz2n(KQy6LEEbVQjWUexec0JBJL6AeU+#E?(Btz97~v&lnvf%B5}P+F&Um2j}8?; zjNTZsxl?5ZbZqBjU>>P|7X|NDSfNFr96XD2C3Y)6;bEuYbl{97@OR)i zmYF^h>Y77IG4H{b_V1gl9f7D>J&kv{Hw>se19tuy*NmykGc< z_0fM!YUDR!K-h2Wm2iO6t~dm1tIpuj2u*BgyCSI{s6}-y_N7H%*@4M>*O1h5ttZ65zm@bjjz6uA0h6?ugvMSwg zW=1DY&BT$@s>q&026)J?c)<=b0ytgEZqb{C*aI(-d={n|TR~-eL;9HYw2Wb8gF`hpuIx(v@gb@>J;2 zWhlIKYk;*N98ZJ6Jgj>JnX6Pq8ZYc3MhBl`zWFSE%&r(5?glf(>0i-5crrS=j)h5n z4Kht(Fl~AJ5|(;QLDkfqeB!CFe{xP{syOSflU>TkEr{iLFO+$*=p1*A`3ciqJ>b!t zB;KX&HvJMci5e`{#8=}dVbS4iI$E4NE`N7{+q~71<)`ikyI>;S-mMF*-nD^SEA0lK zRX6Fz$IiU-rm+w|@deou5e|05n?AIC4U$ha;9r!E$6B1`|ePvQFv6~0f6@O_>M-zQM`J{7|EnJLUnbzx?@=-lBO zgqc|`%*=s73p4YWFf&z!nfX7PFYG@vbEYseF(s0A z2s874Heag5=1ZI~Gpqj(ZN8k&RA8L~%HYFx0!`6zWM}w%++{Jz1* z_B${uc03+YNH2WQ5DaUM*wD|%W)lO2oA}9JkM|3mM>5nt<+0q@ zCIL8TiFRS`ehM6k{NE%Bnv8_k) zWqn)qY27?ezQ6hryHa_d7}R&BIpiFjxyyo9T#BX6Zz|B#xD8u3&jPtM6Znqw7Gn4G zIdsh^B)S!I_}r{_#OvE?x(Pz*`q81dTly4B?Lwf|ayhGc!*E@eGko4KkQTg>@GsFO z&?lja9cxyh&Sf1qs#K274bsiMVOcM%uA+65jl?7!Mn3 z^DmuZsBWVU?~${PW`7Xf=9j*BK0I7FalQDTpndQ!z8g%vCzIJ4$fIFeAYIiX8KK;t zb{%~VKE#}XtJyobq@)=ZZa&E)J=RcF?YVT=)AeZZOPr^gcZxkAyQx#$PAKt~NK@a> z5DM>#vtzD zx{GW!Od!WSSHPsX^0eS;KXTe2Q1<$-A(V^~z;@|tnBC(z|8l-r z(rIK3-10BLhVpF~=PS-6mR*7cV?!nF$HWfH>u0cPS{i#-6$<9w2idBu9I;s@BLjUB zA)xdFEK2A_kB_FPH+M7!wZ}u-#XPjS8vxDvg!R35PEylr0b6q|o^8!%%&X4DHnlNT zbkk#Tt!X0*+c|>dXbmUEUCY_46Y`MuttXxGI#G5y-H8RPX_B>$TFQEAYT~h#lXzO= zXk5PV6Fxlog)HlB!wrw8fmy(AaP9wvWD!cP*OtPn5LLk8xc*;XvxFu zA?{eZFcZCe%gOdVuOw^7ES9`Td5886dYBp=O){>=OZHR@E$p8;4{K_=!n6Z5_~F@S zeE&2a-_|#ivBgVSzYmr)-|`C^-tZ9<7rtOCRr1K&jY@FEPJ!F#dqct5{ZKR19BU$P z$_ASMA|ryiA`e~Nc$p_;k z+rc6E30~T0Bq@-e&ujwgiMDSjq3fQq%xNJKTj_N2>|ri$bj~FwVva*t-T-z;s!7fn zl&~LnZb;-kdvd?sSx{!vgA59agUaVKVEa=!>gRt9@?UnR3%icx3ipzk-ppJYCF_b^ z-dD5GKab(haS6CxRmTzWqxj%K>g2$>m2A$wN${wBG5YlC1W$8LFu(ddyQgQo;YPH9ZuE=iN77<4Rou1r0CO?`6DF(j zSGBWY+?*QnXSg<;J^P5%Jh+92UBcM0A_K&2?+Pchj%Vu!?1A7_{orbt3f$>rfNSj& z;3AKKrws6Xojxq-$Ry9T_mGRjs!-nN+dtW3S=@c3taZg+Y#aF=;ZOv|elW(Ra&k0Z zIg~$)j^k-3oN2_x0X*;V4<43cuBzOWJ%2rx7H(WDy`J({HaY1M zf8?)5A82)x8XJzI&Oe70wVZkXPu_R`#e4gkD_vlo@O|D2-{*|*eQpTfXX}5y&u-!S zTo=BNlkk0x3g732@O>5v-zRbILgprXA9vyVJQu!?^?$xkhwy#+3*V}CD$GnzVP^LGUz;zJg_&6<%*?LB%rrW%8x{*QGgp|I zp2E!hEzHb3VP<|4X6DKNHecZR9Z8`uGylc!>R$oGOv#{AGg;PPlZ^E zO{Z(*QIR@d>9rRY)QrL<21nr4#eef%?O*(^4i}8Y^RHt`QRXhmCZmRPkKAJ=7WRfR zjq?3OS!P3~SiB|+W?VYgnDB?_nC>S72P~HK-MJRtD)eQyBU8!H>SY+!IgEKsU(Q}^ z6!&AZ?D5d03*>c6Fbli$h)8_TvSlMi!;)?)ihE|+tX z&tV5p*=;GCS$P7Mzg+>-erzBoyqYCa^+sYf-;$5$wgY$W*eUtEX8`C{CsG=##>s%~cJuiKnJHZJTCJhlg*aM*7_Gso9egk`*y+FQRn1m`T3ehRs zPk6-1WI^>P@*rDVW-MJ%xMgS-ygE98xL8EtAEhL`Fs>TqO*cTXMlbezv6M+=t#HWp z@juz^GpY#3VLNN_Lj5DS;IF{1X{pkN=qmW)Pz~L$q@$0bJT0G-#xJbWquZXi(3e5M zsC^~d9km;;+{j0R zv7@nOej}bAexJ?N*@r~9{K;L+X}MJa43aN{rtV5Sys?0PA9s+QGL8c0r6XA9j0>{E zO_O1TTu*v`WG`wt@h7oMP=LpmufqBYUr?Qt%)IUZHZ|o#e8E=cKL5Pr6f^91Ll1oa zgvriVuV%%+N1$`h;(l^#Fu@)%RE13k>BQ}GV7-gB&1*iQyF6l?^{Ah+Spc< z8Qvu~`F<=n6X%4D{b83{FtjcBOpg4HWqP|N5&Yr`-)+~Ds?bMvTg2agtkX$o=bObQ zVHThykEvMAhg`eIOvkMsu09=vduP^@3WIpboo(aLa@$@UeZm4}{XwR3?I}F34aI#E zb@<3*M27E&a*Zo{aG^mhTkzAA_NfJMar*)jjWkHW+f?!(|1~~-oQ;{g9Wg1g2IhA< z1S;zCpreO<0kbM8!(Rn7Fpn%mkqwZsP!HcQa=i#^M?OdzeMx@_%<;TYzTC8=E+ z1sBWe*@SL`;I8{F@P77ArWZC7o0cE8i_KQYTlr3`x$YY|wRAO_7k`&c`+G-nq%cgD z{xh9@^4wuJFPsxG>H&2fD+=pgI`F8+QS6|pDXZ%e4W0XsgC7;%ko@88RTYOErijWoAOz zXNHcpIq0k!42o};K++drIX`!zYotHv=W7qYegE1Ww^ro#xp{KhEk#n<}R8)jdAb#nJIxJ>G{Zj74yt)mvJcJD)1}^rFAA67lMfZL}mI zhbb@=>Bo-7C6OOOIgm6kY0K(WTB2U8;@fQcFaadRla;g`!K{D7w`2yjC14 zy43pU*BC6i)a9Z}O%h#dAJL_ni!SxE=u&5kE;U1RscE81-Sc0U`Y%3f|F8UvXR~7| zajxgDJ9g0?rNg-Wi?ejl`!#g+p&M+7@mVNqUr8h6r?b3=f63j2!su8(iN5wX=f1x+ zxJ|t&d3moC#a0)!J@gD$bsf!8EF_)_W2r%V1)R6~fMdUHqWKry=(zG!vghDUN9V@sekIeTkSTX4uf=^zy+t59!Av~+8VqWy;H8GO-$ z#-a$Nm?Dc05BRe)Ez+kGzthC_-CV17m-NhcCF%Sv8N4bpyl928iOniP5gU!Wv^m*&hJs}+3YDW<} z*Y99=;5d_;I~$*_8zp)A+ZaxqH^is@Ymp8;E4x}a9Cuz&z??1FOtq@kj&7T+3!~bCB+hv`L`U->^gnD1_a9%Dm4t#oF8%HF}>egEdn z#7h323~(uj&!tA>*<)8!9Dk6mi*qLDs!y<0&n$VnIOiX>gF@I`Q@X%Ijuu^d0lV{8 zKxWlZoOh@@9*GGeXWEa^JyY$(hMhkx_ngQ=vqOo>Wlc1>TnwcL#YSJ}MfBFXbfz4l z&Oc;tM*WH>5MZ;P+Fj@d=VXDB5ixV=`^m<e%(GE%cMEPejAXsm z9i?V*V_>}JHP$)blV2SEv+&tsMLK)NGn{-Z5!4T5Bh?R~A6#AdMtT@Evj4)3uaHX4MX|QC^MS zUtfw|PQvhvv48@_g}A<0iAn}OCSP+C*&xSsI5$k3pmNWY*ew|ci-dph=ZFO^f7lD! zw#4A8+dKa!rN|cIukDloT-pTN4}FBofA#sxioeXL>ugeeHygF9 z)ws+h56nMQ;NZBuP`vX6NxQua77QsOJ?sN2JnOzG^Lmsoi= zoU!s-X!_g{!^#g8*f!`hi!>uRpS1@%%|FUilfvn7*Av*j`5CjYjR&WLnegELNqn|* zCMlm4kK_9=$=?A=aJ^E2@?TSV@_7p|EQrMG?cy%+mr?jBdL*-$^&1KX&4MS_?xXx( zbuw&}Hw3&4huaC1zdmXPt^=~+S))9E_}~tt?^wv@hHRwQCM7_^AzQB9%~Ra_FrYbe zMzGCU4*cEiy0Ua~Z`swrLo(>Mk2`V=g&z+=H6`e2B^M1^9t>V2pcorS&l1tvJ$xD}3D)P_A?xRigI`g7>j6cdv2T&b9}}tJ;~n%+qmlIHF=uJ} zHwCKuzMV%j8q;^tskn1-9ULpmp(ZY?d8;@ZVj^*(3qRFi#;hUIt^I~bbIXs=Sl@{} z)wnBnJt)U(mXGAK7j>5Yi`Qd8N@7v(iH1dAa8B{?->$sK)RC$i^|JS}ESFwusHFjZ zulcKW<cgHeGU-bNfn1>FzOqC_8Uhv_Ed8z4?M`(o0Foy#I_{MLrvS z_?=hIbkLTw(yFmFeC@46u-xhs7VKF@X8c-3(-fjf&1r4i+Zn*!QkPtbtPyARjb$DQ zsiZo&k*x3e4ScNq`0G6*WaP|SvQ%RZqq9RXGWjF1i#8+1U+u8|(R>zP9gG2~r|{1< zZEz7=J$(}QVCr@cUirs~-Em$8UF)pKTMd9!=QKh7Q4bQh@fHkk9Lv%wI>A^?b!s#C zFIiXBPTZ!*$lgVvpowAl((fwKPvmGfZLQ?RyQk^j+1+l5}&m;f2( zfADPZRh%}dCw;lD4)Mtje#pfEigllpt7B$iRE-6<4LgVr_r=f`qu#-zh!PTVaxh&} z>Pfu1yaVG1KkUEFjJ>G!W(iZAdD8ojn7Q~aZnT~APxAJkYaLkA-3ah2kAxm?r=!f} z3|08%1h0;Zvk^Yic#T6gM7_Dniu!+r)e}N!V5e-DImgg$+BbJRlKlZLJF3w&Q*@;Z zEz@vW;7(c}kc^*g65x)9j;ztKqL9?q%fg*6!a*Bh;pDWUU2YI>+8)G1ixMPMSD&A7 znuKRgb&`fQ1fZe$ZDO?P7jd3m0kO%+)Z?Qv_uG7!85bt7+vk+T9ief&;I=y5w@sf0 zyB#In!#n7i6&6L-pH*q+rYL5vn@QdL)cBBe1$cNlot{mNruha1(Dlpo!nzxWao`Ir z7$3Kgel7_Vo1Sav(a#YiuyZwh_%sYg)%8Xv@w1~v}Xv{F2xcqP8*0H>}}}L=bfmr^rB?+Ee$$IoPE>o z;Y(+9c*3O(GW4&2mdmPfxR} z-uC?XCnFqvN{MgnwT#c`at%_Ni}3i=Ka$IRwCSl5L*A#qCf&2U7txOjNQYO392< z-SLTfEAhXlM9$fs-ws#1chhHYExFOY(dqb8DEg`D0p`=ON zJ8iza8y*h&0!fbsK;Dcw;PNdF(=r1|zz=)g-tW_VcJFmoW15JQ_-^n%Zz>S*5WHf?~a3YQ-6~W-ZL;h%Mt>^7qA^$ zFT>utSnzY5Qn;Z0BPiF5WCL6yA+~P|^klclA+Muk^P4A-kl6seO`>qlLyn(9KjGyC z?If#316=1F#m`B%KqcoS^Ba2_M~*2(h0ry)<)J%j)I1^weNy1&`%}b6_6_gPkcW4_ zI`O)Xd8E1d2+Vrf72h8##wm6lIQV@qd~VW#`!0WQh{_c_3Y}?GN-&Hxw8HoI9O=eu zTGS;f8dN@4ka0_DWuXdDsN2{Jy7`=gG1{N7Uw$e?&z(d@2X&)|-Spw`iBp(nTuCmDsD=bWs!vHfYl&;KVmxNEfRru6)!l!;EeCqqcr`Ag!$)*UOdYJI3 z2MeECU-;C1!lyngd}=@8Q;!!ub+quQpNKp4y@gM0Eqv;-qINhXeCo%-r|v0y>LtRb zelC1!E8$Zcc<$!g(iEhxZ<-cm6b|P%r|L-m0sA%f3Fr9xNFoLIVIL93HVd9xSgQ`?eF>n3sWko<_LLUFtm1rFIcrYJbtC z&Jtbf@&EP*<>Fr0m5MI)yXaDniY`@O>QcXdEW_B~fq3`mQtr3oB*dt< z5KPX-H|cI{yMZcg4H|)xo|QC7hfB_q4_G&Q1vE$fD%^HZhSeEiOt-=pCSSIJS2I3I z)~>PvY36FY+;bZ$|4MV9KC$agWzF!^MclM{gEy`SF)M}c)Bp9|hd?7gcAPw;BCXMjzg$F$Dv!pxy;mpXf zXz->Oy{DAom*;Nur^l>jYCs`p6hcBT{>95eZ?<$K@~b0{)8eThgKEvul?WxjKGSwbMuiYy~d>ujj$kJx{bC$7`{%H!+* zNN;aXqMk#7s8`KPoW*jP+|)w4!}h$?rAK|y?R+iikuD%Lt19QO`rGp^DH5sa?aS2G z;V&zD(0P`$B*-VH0N0;@jkMOEoocAc*0O|r?L(l2kX+@ zXaC08zVWI%#x3ka2Gt*i?Mn=~%eQhGbl(61H1ELmCPhvHTAg}lb z)q|Uz91Z>MC&Snq_aOPI1dgPb@shy@*}DUW$t#nG&>k@b7Sz&Ou6}@MU2Z7N=O_$FPSO6C6a+vm04*@?Ub%x{dUBP%ksJ%)?W|{ouzq zV|@2*HujI`PPg9dMf)#N!}1k5bEX}o zsK^$PvV| z)6{3U<32s5mG@yL8k4DxJ>LfdpP`Ss{0m)n(0Hu+d% zkB&I9=U^zVn-7t6ffnCsgHQ_P!k4lK&_4ZWazXJsCvNV{ni&5h)|(AThkFZ7xEet2 zF8D|8N1h~K^-b9>g?*$+r=2X3Sj1Pv_XER$S^V2{DY)sqFKcu06g>DR3*FpWOG4&q zprs#_`6Y&1puuVtw$C7z)AVnI0RhThC?`kEX3W>mm=DojVVNFXa> z&my}wn%pxbeNuTz63(7AlH7QojU?2H(YDt^@QvWgu7CWOnV4z?*U&!1C`Xc?Wz&V6 z=Ov?=1r4~^U_V-GU5$EXjf5xbveA4jiQ~%bQ9{!}^#0i)w7XNB&9C`~8ozu;cXa+@ zYEloHU>Si91Vur5w4BHlS6-EhGjl6KyQ?#TWJ@&eTE{Nq`!l= zX%XBai?l7X4`r~DCNJ3LIosKM&sY3G8j>riQqc5d2cLY>kNr9^j5RpX#(LO$voDn2 zut(7ov_CDGy(%fkhiqrb*Ig_4_@_Do!{#S{S?~|`%ALcjNeim-G$2^;51tqNgCT-{ zu$kP#ZU3KtaKHn67W{+G(}iA8@DEP^&p&uv@DDyI)v3hC7jb$N zj*1yqqU~YInpW}*K5>w*i8z7cMp+Z)ngYAKdj`2aBrrJU%A*GLDdeTn1a|H;BRZkA zjl8vRVDAdfmz@GL(?(!s-W8aci~fJi%qIdfbCkf$JSs3V*9gqaFaJ4TLImeaB-KX# z2+Yh6t%q4oU}nw~n3*A!O5~rw%={}bGhYeJ%#{K&b2u`x;kd?z<9o;hBU!$3^CnU* zoJWkfEy1K1Jj1poA|Cjl@(~FvmzySCkB^MZPzSGYIMips-w{=H;IPd5>hJw?IV3FxXPKGcNt-K4->xd8?=Tgm@Q2B>OIGN3TqJUsy_~EzGG=EjS%Kar7^BmB z55ac}KB5ZqMZ!B}C0t}SmzQ=@;6vJ%kz}hvr25rgn8&Loh05#MJ#)8^`}y%mS;i2b zbN-E$^ae=KqEOg&ZzR5*2-rKqEL_9lQRuR!F>2p47L^{MS?h#B5?7Fkwc9h<_8&BA z_kV<<8pd!u#QmAZj@@5}BuhGx=9+WFggb={Zm;Gy2dA?S z5@XnY>JL8kRm49!yAW4PPGTqb7L{zhhAd>>?MedQ zE+aEm_M?w8o1ptpI8leY2n#ike84BPQ1q9m->76N@d#)?rV=rGHlu3niZ%cVsNC}% z>J;5ZT2(Ci>C}PUPiXS@{)M6CcpM2Ye~p48r;w_#m(b5A`S4+$GW)KQ>uOzHlKOa z=?|#udYEZ6hSGf=oTWKNT=!nR5MEGS!E?i{GE5?wcdCr zW?F=7-tQq9QUB1r_TSLOs1YwbJG*TBpJe2DYa9}@okINg&LHZ|U0mXgcihI2JxsY* z3VIuTiqX3{iKGg=%emVoa0*h((etFIaMIzCDBzMLyW&$3mMDn8aRoi(Vdg0^I{zdJ zT`3QZUlUM;W_vo6oP9;hBD?yElFpe5nOu78(tc71^h}%WULP;q592`xXch{ z9Nicg*fs|{|M-Wq-9EqxR#)Nm9T47Zp|Ih|1H?kYk;G^2C(eatg8N&KOmAF+rdZx7 zTRWd4R#JC4?={sh%*Bx?^g5x`Iv(eAgrj^HITZZpCk%FRL5D8gfcr)@mc|@a;)9m2 zM#=?=5O2SX-%3qD=gvtGg<0~%a&|QNlX(kaGfgtENQJzqo{644T#l-Wbdb)f$$XRF zW~iN^$E7>jm)WgdK^9z!DE+;44nAmefk?jG&ODqthHQ|Sjaz*PmizgOTVI_i?hZI@oAB$>!<}h6|DskiGV%X+58g;$JaICgJnk(gj zq)a(DRr(Zsn4iPlE!1b()t30}LM=2pdj^T|Rbmalr9h)c@8D459TV3 zaiPW0!?K?5a(Txl)~B!w9%y4mCyUiky3CaTWy=S@He0R|I61%S?bzL%H}4WS#Gjyl zV_&+CwQUO`?S4ntc|(PQqcx4yNf~5Ep|!+Unuqm&E%;-_kNKimb^O~DJARY>@?m-I z4#!&Cm+pe!I7sud^$qxSwk&VrTZ?}N9wr0lGRYe5IJvk?7EQ6q#uI`w;3y+SDA~E3 zt?-fM?~Y3#D+Okhi~MYo@iLQb-R^^qsrCqS^N7*=9n2n?&7mJx_wzF_%hwCcOe`=n z3j}7S0lCA|5l?g~nUVV#mIXh3}|i{OIr~ z_T{~7q!&|-T5ldhht4lT{-z6ImH)T@HTy)xYv$N?)l~+(xQ)FZbTSV6&SG8NePp3S zGFDW*1oQX0a5;NKYT#*biNtI57ozu<0tio{NB}r*kGjz zYLop;?myf{TyL#_a}ve)kkP8+`Y&NYj^a+ zWGSO|PZ>Wu+KxNkoxs0&AIQ(Yjr3RTL=P2CqK+w_$;PKPsQKm_+#_%W9^LULie{4h z?h*-f%;^eATRojzyn7ItRAiu?ld{ORb#<6FQbeV*&T$tN8ko*lW8!xu5vE3eB`>F* zf(M$u!WSFD;F^=ctX8iKFSGGG*H}fvk1kzh_HW(D8Q-61{0IZsHIxmL652U`kk57H z{{f9<@g!L;7nY#xX*huvHEccXIBvH`F$a|xhx#dp0Ek|^5HmJ3M@V0 z34R>rMLzvq&sfA9#U^bd(TCC5*y`XhIOU}qO8&PDskqO;Nh7x4&;H4Nuu0m!nt1amBRa^D%`fZm~-Cla!FPdMj+LQQ#Hzf8yoBIhi*b4Qb=TDE#M46L&5C1e8>8 z2FeRqWOfHIs$6_|X#DpxSRmB)z`g;lt7FCt)rz*?p{>~TNC6}>*8w-89 zDJH9SE1r>aiV19*SElsd3-7XDfR{`UMQ={?5I*RJ2PMxlj+uflx=EENopC3ZF3jbw zAGIy<4mg7hHaMfd^OTT`^D=Tb=L~w8EXUcN{Q<>}+mVw(rI}F>f$n|OBBtn2Y39T0 zur+rAdKIgJ-s(Pr{^J%ASDmrgx_tsU7aD_o)?a`-tGi(Ib&e}5n1EVesdD1kZ@ELR zZ-JZ3L+BAo@C(7ym^l`4{mBOS?6e%F$L%{D*!l;IzEX>s_lRgI8eMGEej&TaHWygd~#{Ji&UKYC48$y@bB6O+RLYL|Q)7Y6pm&yuVYTvg6;_+XXdilRD zRY~YlhvV7)zvUWJMmM4TITz6bl|s;=DZ^(kC}s`(Q^>~gH;Bf!5oGnEUwGlarLaJ9 zBfI+YK4MO%p&I32qPbp;SSMXX6(WxL`i}N+MWAwj8iK!b=v}zGU*r&IG6+%Y^lJN z=uO$NpJ(7aiEU);cXzVlr~>S3If`UYzc1UjFNJCUTUusyFdE%SIu1WcU4%zW#*yFh z)9_fdpR<+y77hwL!9;VbKjvT7AU>~pcB@y+B zcyekNar-bvz!Lr8#%#+%Ll#=>9Nqh$)yBTH+UR%+1$n-Q@T-Ky%ujQ8&4|# zIH46a!R+U^$B4$C8|3CI1O9TP3ktCKf>VP|AbFoTel71TYw4Hp?T;59O`Y;b5QoOommnKp2qC??tQ zPnRIJzFvi0aHIhKJNXK;XJ2u<-HxGv1N!{ILviGSy)K`*(2oDS;tVphn#|V66%spR z4>E0@Keib^$Jfch(OCSf0 zoyvBey8vsxu0%_ObNaT|W6**zQ<41dG3bYwEU`M1K=xQpB2OG=l6h-0P}4qlxT0(; zGxrq-Rn*VmtAgkGMDlUmn4FEdFLH3%$0sEX2F1v`{5TPn&LLNB8j^6myQRNnSCM=Z zFMO46ooYBulQWg*4oq1toVWKe^Q5^}jkA5IVR}Z5v5_<50gewfc zpNjWv>_bmNjY(JUQPkEj01vGfq4A9haOhtxPU*78H3Qn@)(w^%zc7vMHC~T`C-`$w zMo#$Pqeqw%_GSMBR+ambi-_FSMQq$*T$E*vH@%L*2bqPav&9~rzcd3nzFdpHPTGr$ z8`g4S*>~XG3)|6(ySw1atp!;1aRRPR<528MfwhZnlF$hY$=Bm;I3mg(>z@y26gQp) zCfs*ik?V`j*&jm7OgF$$6OW_aBe$X_AJt&KX9GGewE!(yy%jgrC!>!?9f{=AE2!}0 zW^{Se5~84R9d+h^LR}gPNHTRl;s)E`p52=e-~19U{cc9qY`cra#8;7oGX`+P^DwAu zKL-67=R%%GmE-$;U$C6XLNZNGghqTm53PF|7~AS^sI%_?lKY!Vs^fKez4bw)Nz4pO z$KF7zI!p1_wqn8mUW96JAjsPmK#os8im!Oy#`KvI*wHrv7WZhNV_jMJh0}Ciufzxb zF#g13U1}s2{kw>+O%K^|R)ts!XGP4^9MO#5&QcPDR>lAt1=jQ@2I zoRw(?NB`=>SGJFZ;(R53c(xHbp505dPPyQ>LVoq8t^vOh@~f>ve)UVpuYL&m)kYz| z;)MKal8|2=6Y{HxLVhLjUw&0BI1HA;0Pt z@+&(bzuF?FNPaZmB!lu(%0plk+rL_utrU}E=lxm9hMJflU-P-FdEPr#$>OMYJF}wVaaV`5l z{=q(>OWpFHfAGH0rEU_sR3pJZxcGV%-}+yd8vCDrP)ghh%@F*9VM3Q0Ep(|vLYMkp z=u&HiE_I5~r3MLI>PTIEc2MY2JB2RQ>wi&~Ivo3w{QdwI72hPm3DWGJUM0fmk7MT+ zJ|*0=>t%|TVr=J~6cjLREYH@;^J=9|Nb>c0CNo70oqW{CHBD7S&0^lsJEpo z{B=Ijyk$V>hsxm%zo6E4QwNg))tAS+|%`MDwW(`!ClR zIfP7RqqodQ&O-_O^FPamB@6!?@{#|0CYqOgtH#QjRUSkuMqVjt8bnqq;*y@y_-6wy=M zT9`p1b}k?bQdS|s9f5$dkm(GCpm;AuB)+Zs~U8!;1lq3P0M&U?xvJomg4s-LhPH80X*9@ss>aIFHycQ=8iCRcLu zmoIwwxeXpOj3LsNx_DgH1Gr^w2`&_!C%e7Yqk{=AaL4>TF#3)Hsxh97eXg9~mqp!1 z-k^b`q#h>KDHGYaq-bQ_1c=pVo^Tqm%$nd@BBx$SR(rh09#@7?Fj+@(`=+70e*$sc z#L2MhnhATPML4(fZ35Hod=*WZevSkT{K6yWAQpaf7?$7eGv^Ob_Ff6(Hy=m)JS@l} zA7NhmYX`YrwHPgdHvE{gquCeFj$-SEaWJephF#R+hrABv;4gU@WVyyXv_3_S%^G;j zei6DjiIQBSyGHEZw2)H?-YELRMYJyX z6`7WA%177kBX3kq_)yck=wodrvtiC;Bv$ztmu0$h6JK0LRo6PmhUZ(@97#hIIPo?a z*B%6Ik4V87U4%9qnMDqcvEVD@Bgzb^FC;{96xK>u!tbkBM-JYrkj}w+a;@kt+FUi6 z1r`V35gWm6^2U+zT|Ne>x!fZ9vUz0I_H<;g;>1+Sx|2f|xv1^&cs5Z-@GC}2lQ_xo zY{a5fe2Ziq%(-Ze?8ZBzb7^YG#!Hxua5sk)=7DHJ=RZ=r_&RBeo{rMu=M%7LBsw^0 z6k_woBPD%ta{rycjsJI_vv<-U?-$ge>gX(}xo|fev-2t%>(vA=TQ#FM+xwvFsS&(^ zyzso+_lGpD)ZoWheU1Z(VeX@*`hVbJ3C zV{~a^2}yj`13L{{VQsO{u}}U0jSsqzGnZVUh3gHIX{!=aI*c$oT^rR#AdFW(fBu>gR&6cX{&DKcPY4!@VVyra@ zFFFf%jjShowB$(Gh)Q90y9fRjO+bg&{zJ~K`(gF@69iqI%x2n*W{>rlvX*HB79 zHi>B>&)s3dr;i=IC=PylSkFJ^V_6CP zRj6@Y18O?F$#RXWlw}JqjDzGG!pYk5mPsB{`2Nd={D$j*KMQ`cXen#C$zvr8{a3OV z!d4Tp*WSEm{T@E$O)9Dty40IOmwHj?Qk8@*wMXy|CJ0@sj94mC61vnQZUKN$4Cs7p-{oG(Cd zzT6U=FS7;bOZucl5^zJ2-}e3gIbUQ2=gYFH4Adz&U!OHH!un9^K!x$!1%_qBz6*m#CG zJ)h5O%Y_lU6RhxTIgSF??qxeGP7$qsYazG7$o*mh=eE0+o!4l}FU_bV$9G8bKFbY| zDA0`z+DY(x+(QJ$m)o%H6MsM5!794_v)m-BVD+PW3M>jMVsl4-XD5h%Bw6>$m_iRp zJoRh=T4At-zghgzGIv%I5pxYif$ME8J!+Pt>*IPXojgvJU$PA3cO)HVr&u#=?#NnF zc;^ptm~oAq%X2MXzET<$F1G@i+XWTXIgaw5td^>X*(v%#Dlpnc}z1!Y^O_!R>((|Sv63>#TJaVX z4=5r#RvD76?j`Wlmwps`+ZiY;=fd9-&&Y$tJNc$QI4pA+b5_}s7d>)hkw}rfQaFY$ zsC8gB1yACa1aC#jC-nKsjQMPQ{6B)nYOrQ(K6~%+-?Ffit4JF6fmnVmMd3m>`Qtue z@$q=%cvzm*nG*(|9Y{xS4Njn*xH_~^=>+GzRfGR((@e6`LlNUDh2>oC5V^8QP;Qut zVs~H0;-<6t4_q2LT`=7T+k85E+1@^By48#8JrXnKP{Yn2e>?@8y@@4J8TT zQp8yQ18Ez2OHys`qSh^6@R3y?&|k`ttqHK=KbTjcy1vcGW9e2B&HOG?>&?Pr%de4C zDhzI(KAqpbb~|2_c#McYKaLKYOd{{TYm!TPr%{yuN4(b39@@9=CG%DsM=wW4!p^w2 z*oW1C>JBq-VqhxF_fm#s4&vz5$pZSl_6bIAgEcm-6OpC8mE1;y2CO2?BHsKFNPZ<} z;dk545cX9j+VfEp={7Cm*BsD7z0XgfDH@&Ci2~ji;~3dStXHs^7@MkNjgc`VcH0^_YsGb_H8p~m zFZRWw$5F^?uMY1No(JQ84Z`M_Jfhgv%lz<7#}D^-B8R+P$ac_@n5z%qd*3Wc%RPv` z?mC9Vs|U#BCB2+($O*FdL?x~p9|Wr=dXuws3!ZHw&Ie%(UkT@_H;)|+MUKr#ZkYiZ zav;o{HCN%ldogmH-G#kWGMTHNJ7L?WL^Lz{H;%h?jU>Ejgu2IeGR{jI@KYx_SU42P zIZ2e`)e22$ajOEDy=V?G9%#o^xiskwyp6B?9tm%oX`xH>X!uOOn3+p5FnafcY>;^i z$0yC=svJh~jUO6d@R$*3!4YQ|`NINEJ{g5Jd-f!LqrMCa%=6okeN!o{)x#=OWMuRV2RKP8%8;g&A`wfk@ z2=mkjedoostiK?Z1gEw4RdZ zJ{eHTNtsnVvWReh1%Xe01PSj<7tSkRhmscGaA=PzfqqNKNWCR!ld!h!>{(Ja`rc~t zZPH}C@acon(Zr{Zlw^VQ!R<1(^MG@q>{S%Vnok}GB=n!`NH~i;i zOIhDkW72p@4QQ;|iGw_X$kT<(kcr(~VpS!HjOw-#$!9C^Y4N#bw=`mLU+*Q9v%`p4 z5^$cGr*suAS#}LBJtB_=8plC3t-ahy7)`d@Hb76k&tN#RC9#>K&|D=IUhdJ<^8UIh z`1UGI*6GeVw(>+U>-tS7v9JALJ$p(>P{M6gIqod-XdTJ!?vG=e8XEZCiVE`Xt`>Xo z?@`NZ71OXt=`-Kawt{_!Jy?D7dHm15RXp92fm#Ls;N8rtWlshF-~+)wcwF!gW(oem z9eu7OR`3s=6#Rp8SJC`C!9Q3a_y^|;{=p@Je{kQRDH+yrLk;{TYLTmEBaCJD^U27#FwEif|=3e3!% zwY?-wU}lC2%*z=U@McKOUHlJpbsR&WAQ!bL$9r*QWyOoxWW9=&1u1o&U%U_+@iO z@#!!sWeP$Q%Wz?lCR{%DOxf|`sm#|YW_YyeRZhn9B0N1K0d4BI2FnU?3?DiL#AJp+ASoUTMB1Q^uLH>J3@sPje~yr-qMfy`p#l`TNilG z^8m9fZUHRYYy}ryN@F4(g>s!&cH)D|q1ekM46DlKaLY2fxW-&p#%OXa_c$&Hw^<#< z6K4zpWAz?x(;A*Ld{&N=tz__<3U$WL=L-I?{&<=G>WN&5@(XTL<#d$y<2DMMV~WVb z7W4#Yq8Gw!g#BhW^j9qlR}_Tv594M+yPC!1vGXoY?9~Huiob~3<~yUft)=Ln)E>@i zQYK#5kPc_Qh#)Ht9}}*lh%Jkq=u1bg5cWzAw=XY@OPBS;Wexdg*4b9P@ylOq-~0zV z$r~cMx+Czt$_P|Dr3~HGxQzy1_Av8|PQvS&-NbC%1l;M+h>cRt;!Ro4OJ{i1-j*y``GygR#K=%o6n0f|D8j5B zc7-h`y7hz5U(OD>XX$Zgv~F<=w@yTcCYO-T1`IP^ts-3`HsQjXXQ0cyBHW_32=1@^ z#mKlOm90LnPULc;%N`c2=T>QFL-Dljc>l#GWe=`=V!{tS=gQmLm?Vyc9}2sjCmIDkO)_>6SLt?L{B7yWxDGNFGQ8z_8Gd@PcbB7E^tM zRm9|A31g0xeXo(~wR17solmA{EhLkhe=}RRB?^1EbhrRLWPHa+qXuR-%=c}8zGthM z*vM#XUNxV{ho*6H%VUX}%~B}4^esH3SH~S(bE-^B!x{$^=AjQkY4B_FhqC8C50i)8 zVd!w-EbjR@W!S$0qmn>VrY>MFG}+N#HXLj12-zzN)LCXEkCQ^mMvz#&{ebo^+=uSG z3r3z^=dr5qCmeXXgt+gxOWq8GgO)GeTy;+i_j^t;>KAx^eJ>`FE79w*jOz_LSfzr&gAtnKY)1&Pkk&(I#4bTM9}CXdS{uJ!U%~nA zHWp@-T;XozYq%`%5*C+RfS-NqE;E!cgCkupFvpHLVzVx9@Jcxn8TXxl(nGUh8Tp5v zddnci@z=5FSt9q$I-8_Je-iV~7Iiv5LRCQ@v6{9MXLM1UiHzziZ7h7kP2PAN+PC=P z364d0L6-u3an=!}qLKsM zUl14qHPOL*!9%!P6+7J>BuUA!OT_>0jkrO}&_zo4r{ywsSJDRET}ca)V)eIe+iLbd z|JQ8w-{a%w@3+fu)n>CzzB~MVH=9jgyK0M%MbZXU!<5dy|L6jd9I+QCWaoNmkU1c6=#zFB+#KP1;9p~r>A>~fdw7Y z==jS<0B&I!^({7FP{kYEkdLA(4)xIO^L0Sgt08Ka@dSEGN+jrdpTj)r`$$`b&jiu^ zlW3L8k|0=q9DTX>I(=c&O%V1ei@q+s0HpkGr(cxM0cndTPy=pR^rP(@b#9#@7;(OX zQob?@lv`W}t2DFdALa4X4u1$@bW-S>whL+BxnA^_M+?E@*s);JYZVYY|2O4()*sB# zOrhmp@1W}xZ0Lp@TVS)LLDXuvRrEHyN;IzFt%cFc&7zjJWfuEG&R95`xrq9suZi|^ zAE;)N3eiuc7%H<+jgB(%pwAclwV2D^vatFYLe0|6w@7Mjq@}Y4L_f9MsLmhJ7FW)- zP;c(>7V_txQ0IJXML!pKQAL*jEWY}QELQA^r~K??L|bm{q{jS8qdv{?pbE%7sc%dUT-HVx-O_N@jbC=y6*jHLqQdYJC?jx*niQ zDJ@$;?bFJjOMjZuDy2E}{+|;?f4@JZrd_NR6@T`jzdPpAdrMU5hoFCyC2Cz~D*9?|FA5p7q+U8IQMMKWo3d~wZ4Ks7>n^;ba~D3K%I>Js zBV!6hBNo|Elb=|FnAT|exAt`EUere_(>RpQ>^ep@NGntEAEr^S3h&W}c3uW@&B@d` zlUgcOdr-8|O`N_Ge3H7XzLT2&^9M~2jiXV)A9@CNm6@|Yh~CgJKnFJ^gR3W%ne<>y z@NJYV)9(5isP@P)29Yx0VQmUSmL$;0lKSA&@-h&!Ya{3oEdnP;MlzeL(!tnk;k4Xm z;eM9#XCCYHGJ{POAa-#oC~$eqd<}ZSEHkTsGaf`RF0Wmf%KlHx@%jDCFY%Rd;r+{u z!Gi?GEFch`qoSa5sTzFkq6;6L*~O54O*n&6V-}!K%a$-SllpM+~e z(`oZy*6dVfXUA1AbaWb1p|ypXtyTca?X$soTUX|X+#;Z_TL+AdRx;MHxs20+3P!R2 zJD9!Fg_+AKf~jGe%mc}EM%6JBbZyOK*3Ir?#5cSIpBkEg*v{WTVP-mWJ}ZK$JCQv+ zN$zlLrH0`e*ZN)QB__R~%rlE>HrhkanW97oNIEjMBk#~xq9SOkJ>!_*Gf|AvlfTr1 ziXnjS%g{ASiBy365oVRcB!+!-0z9s`Os{M#r4Fc!rqgPR=(L*|VBRV>=IV4WI^kK} zuzVkt8lxGF_SB74nW0=3cR2dEZ0NB1`B$MQs2p(|9nCJ=rgqANE zELvR3%v!0+z+;J^rjfZO1|!)&-9*FN4#jy2HiMUm3PX*eQN%M>6j5O!P}7w96#| zChakT%PJe0p+XT`UeJxgzRAFmY(8UAr^-&a@5zQv&9|n)R>Jznaph^v~fUhbxJ+!VOojSV@<xV?E6?ecjMrCpjp54fjO=aV5lP$dt(CA)x-paOc*=WP0x zu?;meSWWlMNv4jaY^30c^K|K`b~^3jZ6KQBL+855fQ{L&s4jC!;8!s~g=qT%ncX_f z_6~8db`Q;oM*fqq=Vyq>P*bPP~d%N zJTpSA4K#h4&Kx-O04Q7u0nfs+nA>(!z`RG=OsbhGIQ;q$Si3722ulsna6ZZ`_-Fz? zJDw5tXvQG?Wd-vvtA-8;hz7k2Jizkk8zAz!KEroOFq}#cXqU8Np0}hjk!M4hm39LVz%2Rd^l z!HBF{`seLeRAc7~aD0gZxLje%^l{O&S^gFf(1>X7ly6{Mln&S@kxH*WD+W%xnSoI* z*MN@de0tZJGH@VXhmH$Xr)?JRr$Y{u(aIBxz|uJ>wCB;o^j@VxdUV5hAS_1ohh|Ju zo62cL-O=Faj)kC%4W-Y|*9Gy58|m_Yg;em3ew3x;H*BD~mZ{S28OKC^+rNnPW+#e7 zi({#I6=hUSEf!X05gnB2Byw-}pmVp60|s8&;6$Ptm`;79{{B)1o8RA~j4o8udm|)4 z_VPM(sANkYZ`cf8FB(lZF3tq^_8+Cvuj_!{LQ(a}Y#TWL{R$=ZI7_ti zwusiV8AsXG1yJt_WayT-Wa`7+qx1gWDA}MxRDO5AsM=qV zda#J2T&NYIJN`?klWHB5QtoNd&x=b%citCJdwS1MU&eb-BSjJP3A+Nib=(;+CD(_V zxMm$K)80$nFHi;%mBQ!bGz+K?^i#XC66xb<;xRMu?GfXz(Zp={ zGiO-V^Wj)OhT|HFozlS0&H>yQ@f$2DG6GvNhCuGccrd=9nXb8}#q2zo4yvYW&?oy} z(iz{(nKSa$V9~PzIz}{s4t5+3CJN7(5ejbf=c}%axTX^L{<<1e&3_0+QnTobJ6wV1 zf$Cw|C%XRH2o3jWFl|8DGE zYY6Ps&jtIgB!R__q41gCInej{8siab&)C^6Wdg(ofem|>(bO>mf9we`+c{xalB)T7 zsqo7ueYkv=JG%9B15>T|0rr|sW{>VNLPJ-MGZ(Y0;g6l>a2m_7BdP$K?EQ~fcb#G6 z$NXhZj2Bv;cFd%Gm~*inkoD3_*ewM?j#B|3Ecz&SeC+k&oNHkJ;5+eV!>KS0HvDG+7#RM6>$ zZj|=@czWu7K!*g^QkMf}Quoe17mbMt75&M)Akta>jjoB8VqVX>PUj{%GGUB5DD-Wm zJyMjwU6Z|FVB;>vGTQ)5CqrOtq%U}uQviI1RG1wW@{HCrO`w|X2V#b^v# znWnrFI#OAZv35O7rymIbYIj*C#v>bS>HkH~&A3WOJTqi&hCQbL80!M-dryIN-~y1F zUJIli)&e~)5=>bB8a&tonX~I3g5Q}KJh7Jmn}!e*TvRNhkn?&Ddop&}9#eP2vVJ#PYmefQ{7 z!ZW{WO%|AOIs?e>*hT-^@|aeC5)5vyk_AFXOXEvRLCTe5^iGZQ;7N@oy*zX$FdD3( znUW~rd$oYRaZ4X4yi;Kwe2QfbAD#}X4Hhu-W=sK>7HBYcC>KWIwXm-~BL&poZenCd z=s|Tyb!K0Q2=weOqencpVH{gH+V_eSQ=NVbyl?Ubc5jc;(hBc^&wMeaUGoTYW4tso zzGNDBJ=T?3SH6xpwBtT#-|oOnRzJ#Ua}$|!6MdN68dq>7#|)eiyT!aQYXcE;^%&Dr z%FN)yGN5)xiD|NG2YF3)%unH&@i#FMER9tGX>L;(_upwCX08ik0*?cClt)ikUrx8I5)3@3RfHge)p#^7bZr6zW>ACn@8pNz5U-& z8Yq3}sG5grX2BQ{;Ev z&-Zh$^;!3N{`vjWde*(3|F3nO>pZW0?eo~L{XSlMU@7#z_)66F^YDCg7rEcF5v_hp zVer`7@J*qR?2?p6pI4gLyCYCHSqOd$J%@}zMb#w?`^9s3 zUug)P+ti?5Y!Z@^dDM5YD6ZGzLA>ufvQ=*jZs>_4UXP+7e@6|-C)j}TCXP9-O@R3t z(J;w$E=q)?!P(dzW~Q(^Jeqo$C>^S2{9lVe+_4j6!`IJDvi}6&OMgSElp0B(a1(e# zCx=gC3Sjy;7qAab#!G7-gNSMg`lc5{W-JAXwGr4*cN|l?Ly?9?;<6n{IJbETT9${R zZ=ncG%C7{=N*&tkD36D?UB^?Zi?C{@4_fSF&_^!@zGPS6kDg}u^}Z4KpN)qtUj^~D zNEJx&N#NL#C|GxC50f@`KK#ymPqa*9nZKv+5tZ&p*vDoPwb?}^@<2TkSuRMlG>gf^ zU3J8FNh?`m+C#$CFEYwyI?%i=pY+EbA*KSZM0RZ}^Ek(snH|?j(ie>*OG?B+f9W2Q zvU4q5lKBWCB2%%iz!^?n34kNtcBA?wMU+h+fXX39*s5DVK1^(fm{MiPp3pBbc{6fQ2h9Q0S=&dVSpkMh7zS$AL}s#;zYw(}HyW&HG^Zb2p|O z_QS^Uh*`ULK!bNJcu(!X_VO6KmR65DwLO4uUNw79*Nk138N$}Zwb3l=PP(Aw2b&$b zfUUgj#qROSV&BTxvHK=Ru(57?*a)p5n$xp^U8wPp-rjD(Hf5@@3I~Bb(;39-jB#fB z&M#wyv=!KvTV?DX@3n05Y;$&%QwHrbH=_FLF*G_Qkp}!FRQ-ztJIuJ^(Zw_AzB5MD zbJ`=k_UsrHD}F`Qju&Br2_M_p{|0Bf$&4!Z znaZwyxQhz*dD4~MnY;ka@2t{>oxGK~7PMko36pgtigb2YKv+Z$14VO-1^WZh(_ zs(KPm{PqRjmNw!?!3!uD^#&HOqSSDp5*JC{#U(9r^tG1;Mtq>S?GYapKY9cmn?i9d zJ^+RMw{UfgJY9G?1TS*ypOb=#Xc{yK!V+5{i?18S*XZKB12d>((Xmk(yY)xiP$=aN z*&~~Z%dbqvk4~Ye*4&THKhMLU@HE=}g4=US{tgRhCT5opqAVu~Su9ryK_|j-Mx!pC z(9Fl-?n@Z&I1WoU-9YR1C}{MJ!j#IdAZFW)c6%D(jL}^BOQ#v93yIQp4-M3*7othR zvefXt9Q_e)MY|)UaWLEmFW9`MMt^po=E-jS_SqFBg7;#0ofr09=F#$3^%(fR3$GPj zMCZl%xbE>{?v#<;_#-|ER~?l^rE^8-nmUbYq-LWFn}?Rh_pw-ZFRrZ;pn8HI(HVck zY>On+_kN4%m6Wo9w)n$$0Y1Ddi2T`AsA8Iq$ss2Z8t38ygxB;}Cqnu&rxzaWI2dI#H@)38%15OWF{m{n>FtK|C7b6g-SJgX1Z!Mz|TbqXRr=R#p> z4)*9jgORN{u=Is8s;UlxsGB7|Sat`LzC4HcGxBisS{q4;Fd?yiMsWX6JkiT^gT5`( zxI92Rv-srzao>Q5(9_* zie$-9AM^1eKbTb#MqyDt^TN4-G<|&mGe5^c)%-^=Fku=f=w2l|eJA4O?_FehVHk`Q z{UDEi>O#OrS2W|l3PHRc_+4)e7Mn!D=vxtdf0GL5Bb;G~9fm6}vtR;K0}|_x!0ZH5 zvdY#G-tjug=0hLh?Atf+)lCSmxCmqAydL05P6C73()h{c3g8?*X#Y47H+j!Sv$q6) zn~w(#(`67xHo=va{h&J530gK)!T8EJNKrUKE>)f+`e&A*S#c0EKGvnkAJ zZ$IY7?gmD}R)a_dyD-1kjw7{OH;}Tc@nqh-PT~RW%$5`lq9jmDd?z0xLl=vg`Lm`F z`7ip!BV{3Tq-Z>-X^Rtu+&5%aYYCYb(@P?{C&2Yd+d(jTF7&0ggYZ@dcsmdYhDB-Q z?BdhJWoaInX#1WFW*vq4{&Em&`$257t;t@v0m=c-$gWR!!T2uwOe9%zn` z#Z4h_mX8lLU$l(Md=)%bkX^ZYJiD8-tDiggoQm&1OpRCgve)+CrgnkO>?gHJ?DA{p z=>BPUX!@*Z_IGt8EjnSvj=9~fn$2dd z-^$ud`#_IBdqxu{mE&m}F{*Pt7x`Ql(c?szEf|QUKRxcENbM_nUr`vp8oZ$vr;gH$ zup9JEemSM~ap0*vooan6qmvg2P`k;()Fr)!KD=$k%4$xbOC{uSl}Zi`J*GV>_0{ro z0yIN*KaG&pW3xVKP}wW`^v9E6x~D#t{vCKl-EB;$|KBjId#TT&!3uW6l`-sEfeSRe z255OkC4Ia>3(Mk{()HV#>5_0sI(Ci(R(Q{&d+#2^^nkH!Zmhtl+{b7z%h9-ou#7N? z^;RWvjU%w7`6}~JsRqcaaiqA}58U4t5GfmdIO_QW=7feajkE5;o6mDd`sTkx6m>xE zn*i{CF(RF1LgdzozvS<0ag@5o^n<{aD_olusf4GSMH9NT&iQx*%+fsPpT)^x;fAxHGgy9(b!Ct~hKg3sRT zgVDuIjKB0A$%Xq+@+1#CR&0dV2EpJW%EJmjFYdHC7qowT5FJgp{DkKSw7=(S)b~;7 z{z4elmphNj+926-8S3|@k?1R%aBJLU_<7$Df0(wSiEbIx@1H<(MNWcU%@=rO?SV$E zfAGG4C49b>4SMd;cwABew4Ce(N?pUI%1N=)Eb zWfIdq%>vsWPsUoMyNpPbCb@1aOvd!MF(bLbjGb8%GhX>N@d>j9?WZA((t%;-ui7rg zX~#TNPG3xTm4dK!)>?Qup%H3NO5x(G;@DPj9X2(|;iqLHBpHQa!`3_!>>df{lwZNv zLo3K+kBwx>_@@x3-b&Kq4-!0f2&}K@V%~3lR4Qs=*4LRrhvOj<>H=i1OgnMd>J9yg zvW%&L7}M!8k(8QjC6))SGhGWb$qv~w26x6)8-!N#7|l9WvajTcfyRYI=G)3~WaOY9 zbE|MFlipuzQ2ucVG21qcv|4*IAMK#eNqKy?+fZCNi<{<%lrz*vW9Hryr7 zcYDZ5^QEvMvYR~ir9{2Mh57NSmJ~`>F%C`_Nm%775sPpW|lo`X!dCOSlvwS@h z;4(z!#l?|_J#)#0=3Puidq1;CE`j*mt07wJzK}<2CNX=G$AFP^C^Uvdk=LKyn4O_Y z5NWZPq$LNDU(t$SyGoag(KRGY#Mf8?!`9~sdX2a5~l`IysH)(FG=Il7t(knu?k)eHUSg+iQL{| zi}FIJ$o3PB;5=p`N}05RfSMW(S1F>(w?Q~*stKjlHgIT@Do%EF#-lq_K~dx}#H0rR zBj`XD^Va}>rW{mD9$=z$RSXL4ikTB()5u`N1Lpk5b+YY_JCna)9V4<_h%w9B$^=<2 zV^&5mq|@pV@rt?0D2z=oxO(K7!Q&D$l0D%Yv$?E_85{4#Nd72d=D+PU$SEmcLN|Gn z?yNh^nNO)qYo`*Cdbyt%52usHmO`SK>_L>Csu8kzEE)H3Dr4F;gS;a@BA96>S$e+LaGmat^Jgc!W>VuBNo-rd1y*fCBRzEPF}*M6z-qs2`Cn7}OlXRrHQXUM zLka8Y-6w){DLF(73!ABj*G;-(RTrfb)2Y)mb8cs7+`G-ko=r zP76+;H~FX1E9T>9j6G(*LVS~M8o#f6>0kGe< z7Rs|$6TkQZviesA`NcPxWV~HRp4?nYCe@n|WPO>@xR24eMvO;`!9eE`eXWVhm>(N< zF|&@$XG~-}41D(&FlY0o>Z^>z>d&3kJL`EcHE?iW&*Zl3G}se3Uf(NY5i{WD%?Lkg zV)jQ~GZ-13#4Hz9V3x9x2GtjD8Js+I!JxI}n1TMrR{aG-`S{1Q3ttR9zzGNHv0&F$ z{G2C%5vStd`o{zu+Asko?HxdmFai3CbizK#gXr?gA8(4B0xNeGoS$BShkBQ=B{&BY z${#`g=PD2vPlUgl-SGKiA#~K7f+ORrqGZ`4OCkJqQIQqHo1+BZW;J-Qr zc1;Y!zv60e_?$Zw7OX+Xh2_93t46Nl77uM0gKFYA81ZX5?43CkCzjbjLF_Sfo;?HB znU#X+p?laQaTC99pNSbknwWS*6+^RkL40r$=rzBBUAj+DC+h@`%$|bx6zX95UMEoG z*54`26#6)77hX<#ht|I+NJc6_;Fo3iDM}86FEpU@d>7<#`+G}1tj8NG_9CBZ1kQSp z0WL!ip{jl%j@e~|!%NO#@v|eC)}%py1~%Y>?V?!BcL0uo6-N72;_AUTd~NTDE$cIo zQO>3+amtutaTgZ|jX{~9(Hq}wl`^hVvdQg~I3}rl6`6ec8B>ryhUq!^lMz{Vj4>(J zBJT0Y#9H7HW5FIa7&kIZc6tWEl81bxBtjFu#Thc}J5gqEeKMmlvcf>a;RX3&XG`q+ zE;2t)c#}8g+F<0xl1v?KNC@4;2#geysH#G;wmF;(9x7!*`nH2slMXO42goizQ!;FS znJEa#CB+`yq&&0`-s{=GUMF2*(c;Nu9~=WKemXLrW%R(dO_qdo7BHE~mBjIvBpmTu z2u}vq0Dplw$$aKXl+XE*VB-uDw_qK)B_)NP?zQmt<5~#0{EECldE)hDJIONs1L}*S zL44#6oR+a9D_uoN?WI7FbSq=#N8N{}xS6nd?;7CEZ-ztRBVf`z0MFH=L0#Gr`ZXe< z!j}&#CDvh(J`aBdRp6TBevt3t`f2{&B`v|jP&=rMtV%B|;qrw$rGLSDyNUF?f)gb0 zd0>`?H|Uv&pvdh>=(kG=))p2qO{pUAq+6A_b5f9*8)rd;W_=`Qp87E(ol79vJ%y2a ze~ZjMIF6Wa`^c=`!eiVL0-33FBizy&N51T}VlIvKWt3veVeRhAq++rF6P7=NIM;TO z+VMbmftrk+ObO{|LAWY?nXJyuXWSlbAP>D)1HXAM*}nT55v=Yfrr!umci%{)rzjHd zy!k}+oeT&pe@T>HM3B@v88Eb+1T^RziCz7ekp^E#O*}^=dnSQbi5e6SSdhyrz2L#{ zTe2#+9(n?gk#T1YNuci;BFN>zCvFQR_r9ip!#Zme9i&)i+DJ<8SdcEw6J+1#U}*0y zhpa?9!p=Sm-?%JESBx!WD`dmEp>R^Pq!cp12R0Ua5$?okn#A#|1027~{)bown5ru@dZhcEb83Hmm z*=4x9;3$oZE~7#+h%r6wFy73XzCRsFt5mG%{TF)l!&WsqYe6U#DvhP4^`D{9aVlG~ zwgRU-E63<)GkSYX5S?5sNhbt89u)>K8t?PbxQ6u66*%2WmPVMrLiwi~P-p6K@L83P zZyGG<9!p#H`j}y~y0{G%=5YOlHOuMqKy5m><0TFXUB&AMA7Vr1E;PJx6@vHYVoszc z?##>s)uz4l&EQ0A{VT}InkbA)y*u0|$F5ot&BmOG=V`x6VteIJu~83CvomvVu!HJj zd0oTWtVx9|JL7UCFOIp$D_i2gTR#08Ti=<<7Ig~pGIx)mb>p9~xqjS9wF}(Y?(;S@ zbN5H86Q4tO1dio>zLPO3_aPkC#Ov2c<>AaV8woQTF*f?xm^^BOxoFD}OVIcX+l5C!0fa~hV zb8CMuXjRcifyz{DuA2+T_c(+8voiSg{4Ggs%!TBGU69ulgI=07E|lV@Yg8bLfDB?1%N?!g>kfOV4}z|o%z!LV@^QHeeexBCupp2MxYr||dkbI{@HrCO_Lc&QgKr+F<<Lh|{^h^nqQut*FmEOy~V zOBZXaS!_9rvwRHBp3k|HTsKz=ysv@aEj}AP;{Wcd6u^DH{*wT`3_tE;) zLi%#J2h~@GQ2!shaf-VDU7V3gB^e`Zxg3punx0|U#MM-BX*B)q_Z5rRX94w^h?DBa zxh75 z0yh9%1|jovIJo9+=CXdO==CyYgTs+Sa$u>0&5dX#ptA^5{q`CkeJifrp*7-s73_GAoV?WuqP8x5?Rl&~i zF(}n%1DAJYkyo+)AjIa9EgnfktSApw_78#48ei}dnhHDDar>;zCXg~GhrH&AgF#yX z74EDARU9hJXOXL|$EB_EUgs~Y(C zhAt=;*AuJZRj@SO1=p0Of<<%z9C>zwED$t>cVF5-cRzzU$IoG6djGp<<~jDs37Dg-3^BQJp-OM_UR7T5aUu`vUZL z>4F)v7eY^_ByQz$BjKsnfM*v9*OFy$eViP=8<4^$>!0F1l?mXWbq$<?gwZH6-}=URbBR2#l*`FmLU6NGajR z&u2D)(UH^eVA>Xl{iA!><}Ses%R9e)W*!SI;?q<^B)9ddKlAp5Hb0 zbiO_NVy-LeIrj&RXrIX57X8FFZJf*=tX5}Fe2k#1MFcGpDPoVeN3*%#_1M<7i|p>Q zDjK*ynmrtNi<*iZWw&c0YrKv-Ic(1$eHze4ts~^w4vF2Y`T~3Q>8(gQetrP8(i31? z*TmCE&2XB-uBPjfThLD~pO(cCHgBUQbu%--biqqhaQp{+dOMq@{=I?mv(1|JRBC7X{{#``_@=*-(0O#Zjs_&X!&* zpGl?cl&FX;ADuj7AmEenxmg2#pP@)?euwgAhTURghHPl@<+ZHCYXLTM#GTzyK7r?7md_S`k)}c3t<-#z zE30m!$eWTX$y;p^E!+ z!wpNGveqAFvN6je4J{_`Fr4_kjwQzg40U`zv+p&h^DYm+HazXH#!&w-kByE!%m&R} zXsEh%x*<+4He5V#&QMyg-Ee*d#kBT?$RAfmzK9Bd^eQuW_(lgKPY#e3DtpLcu|aau z{xT6xSc&CIiD3NBjtrR$k!_J9p@!w$Kg9NaC z-vkMp-w-3;O;~s-7i&~MTW+mg%3V+Pta*7oTWALwQ71#^M zREw+qd>RBt|DFZNlnrwXoei8$&}+ z!RCXy^w6t^pm-nxPQQ!BCsm2qHqnX3E~XeUouB3^9>G52Jp6dw4~JKs!mf?0aJ9iP zwAgwdd(y^WfEyoG-`WcS1Ay6o_F(mnu{2RIjP}}ejLO)({^&Jqatww~6YgLw*@5Th zt-{~0GqIL^3JuE!@%5McV7;IKR2OF9qNDHeT8jiq9+`=oEfcX&NfvL;{s0gDR-j60 z9jLtjgMu?U2s`l$Zs2+WuDD-B&uRUDD_Q!$a5EK_$)M+EiPD4ik@!P5oH|HJ(#Drr z*!M466gTJHP{1*~FdibXeW{WY0RUS&hq zU1_4Xc^P;}`2dwM&qpYlLi27O#FDj%G+S1iLd7`R5?Fv!AG^|<#q;RKrBzff#)yK` z6m(l3jGr{raKK>>PBYE|$0Kz})*VE6LSgw?9cZfx3q#N^F@y7FDB~;RUf3R`jAB#TvFph!dapkhZ+$z6-d2t%*ZTww{>FjU zuOH-T#W~zCX#mu=y@OvHB`~T`8I#Qc=T7+nqFd{sPqzlJXf|w)>w%g!OV}!~88xam zSbHK4 zD#SUSYm-av+i61T=|99YzZbNGw}8*I&199CFy_B`%G6A}Lj0>0;Iq|q5|%QPSh>Z( zYrl4~tmha=Os#;qT8gMAVv2fe1F%G84({xH30*D3X7(Dw$$ocJ9eZ8B!xSN98 zca8@=Z)cSM-p2I^FC&+qy@Ni7A7o6FExI(Oz=I17F#8l&i%!Xf!*wF4c+ea#Yi&Zi z+l_d2<5L*vJb*u!m!p!2A{ILgp(=k#&Lo`<;Zp z5bN(u1I<6H@Z!obSc^hbHH6!55b(py@5XSA^T7Pxm%N=jk>G{IJAt#iCtYJAwdjZIZ6`Cgak#V^QH#A~=WKhedOxAVK{zoO4+P zE+%{N*Vt0nzGxHfGYc3*%&u<|WRJVMv5E2* z*f#Qp-cml#3fl^@8;hT^PUZ}o;a|lL4h^z}CY-@?13zm%DUpT*Wz&HoYuX*~3nL(e z6*P^YJ6AGPq`DBh3u^IAoZn|U=l3z<{63bP z-$#t|`&e>*pCz2%X9nl@xySi^r2pmj5#anjA2`3ySkCVw`Y*o^^IQxwwN&w?v>?s) zbEb6%Epg71c(kp4ftRG*@Mp9DJ-B%R9c=K%FK2q;>eCst%!**$lSiOZqy}HhU8qys z2E4Ah7a9%A!ERkNCO#HG4~yIQ`lbi{eQzu+6Z#8>Bfmnm%S%`=gOA#Vi_+_D?`b^j zq2r4Z@#R-7jMwGjp%{R_9K~q3Z9JOqzX)k+dvUsp08RJGqSxBz;>=V{n)$#4?ao`G zhE5S4*gqa&@iXN6pf)P++D4sdR{zu$w&s36>;I~nJrf*Phi#<@-$#Q@$L>JS}pIf zqINZ!?&PO~=L7H_rh%G{2=!RZ`G@Q@sDt(@jI7;&y7&k;Xza(o5j>RJg7gfxzmiE6 zDF5JFu-%u08%OTpk%tU6L@8qU(S;P+rqbcM0&0o#=~ca%G{;7X#%D>>;$0RrRB9IT zbD3#Ld3m&RZ^6_&1^PuXA8XfaLeD`Xs(W)SzTTllmllasLB0^}_hMG6Ub=KZSlPwCHEK4Jf$#8~x^IOn-$u$9*%eV%_shylgK? z9RmlctK|%8=uk+b4~L>`o;Rl76r?jIRnR}KLfBo#?JG<_N~c`rGM&Z^TtCYM`lO$q z&cAkvUb=f1v#0A)OBXc^ysJ*@>=#j|aj7`oUX0FcN~RGPmY|@q4sG0|NPCwhV_;w? zP0y3T9@kY=ZlfwyoE}ScHyO~JC`#|HR;0!8BRGV)xMM4iPTIqLev$-u+zNmhCnI6v z-LIHGxCqvS&7sjzSFq=n8g2@hf;&AvgOaTaR;_ZR{E6v!qe+XlKD-5?-&R5MCN=y> zf1~rkKg88}Aud>yiMd|Yu%_=a`MKgFY!=uLNk^{}pH@ehSSEBfmI;1;tSi&7?I@P-sfs=Cg2c>#1x{XlN~eMVxJiGiZ$0r=Xe3`&dVaP^HRtm)Pw zi*9!l-Pc!vx6=Y{#2$q*EkUyMtR!g8YA2s?9oNrsk;!-s@UuPyg6H&-aRrX#zcP6% zb7n!-x3RF#{~nmgG{XWe->sA{`R_0$Kpu{a<-?#+Z_2ux#?G>po ze-2LRVq61I6U-WZ4HYLkiIvb1ki6dmU&jA}tdc{p*ia6Jr_IKf^=+e)lM(MRV#g1S zXG>`ryVaPFb(C$SM=!LomD=mrRaFVBu|_m2nSFuohz@1dw}h}y9=NiC(^}Yfu9bA# z&?eSDzL5T$%CLJJ99hvTNvwEU6|J15&1Oj`uxs*jSnJ|ODzS4B)!HXQlhnGfJ|dGI z)|ICl#apodloQR745L52AJ8@94C$BW#@sxlj(ML-;AjxX4+C3qI9rX{c-PXYwP(=c zrz|ZN{y=q&uiMS>gW?aDs@TyKYpJe&hN92^ZQsm`j6k|#y@@^rT^jg zxy1Q>QU?Cx_p#^vK7*X!XD8?PiQxP`i#Wdz!})y{{^R%A@{ix=znq!#&t9a~|8i!Y z=bV{2oHO$?=gfT0IWwjIhch$f|HYZkIf_X<5&i+w4DtnNl@sK~)-(}k4 z`N(I_8q`(_EK?bd)!jyY&^{SjUY zH^D;pLOSgS50z&oARp&en>ieSK7Ryh&6YIk-*f^)-v!gi+dn~H>^j0iW$eBz!jffY z&{J$K{aCyOPxi=R)y=J_ek2Lk)!AXZXb?Si=^&inu?_3`?4U(44Sj@DFZ(56i)e3fVFcMRvDlxBN zq;?NIQ=12gw`I}r(jL@VUxyzCWKdCMCRBLs#+@STsZT8*`bei>LiJ4?j;_U5<9^}$ z&+U-bUW$P;KH#2`I7kjTje=r+Xw7g5$5TozKlY& zED17M16@6WG;jA*a`@9E;?&}T>n(pV-PRgdCTUI16hDDA=Pb~+ZVn?|$@PriFScp`m0n88Z zCYE7uh|QFVq|5sbBbJlH{0eL(Z$6zhpe<3Py>UDFpeIjMjYgR83rXah-7MJpnxCB6 zAW0hbErsRDd2rHKpDY&DCfSxd;O3Om@O@wp(bIcDZnkl5mEN6Dw9gzggc?YyurBnO zQ^>FtB|d^7uzn7g{}avyo1|383{oR8>l|QZvOl2#VI<;E8B^ox#FVe&zE@HP`uYYT zxSYV|h6v)?w}iBmNs+3|BKTGE2>wVNgVcmOkaglYquBb8^u2!!JJnUd>{>2a%hjcS zHvfhXt~ErnSB7!!JWWh;Ov#Rh0jNFllGt`x!SOc&(78k$XZST@sa_#|>{W(oI<2s6 z?IFV7`v|3PbU}W0B0h1x4w2u(N%O_S5W~CyOU*$b{sk~~%~pK2;VceK5Wz_O2(rGS z5OXUd;8*7xrmb|?Kxr_ES=RWD(YjVfD$*x{w$ExZ zGv!G#+0gliY?`r@><@iFEAFabz9}UN#nP${=Qah?6CuMt&$4coOOq$G%KN&s9tAG4Hshrk z=l9vb`F%_{zt1Gj?{k;)`;`3S% zL>Z)2&mj%+TF}~-0X>7UuwHWm^xh5u$D=R**VKthM1#0z=P;+)ZiA#A0ix@g&Ir8J zWQz3ynaH13Oh9TbBVWFgX_Koo5S8{MlA@Chj$oL9T-s!V((;MS4b8KRrqvYkR-)1% zd1nf9Cs3HEwGA^RdG{Ej?)OZJHK7r0-HNk?sAkuE_aJVG_H(&jYOI~y5g3RW} z*9)wvPf#KJVytPOhZ?Q3nnC$&U*hH?tHJ2%CTu^UO-}?!<6ouqD7$eS9b=kFSE}y9 z@U?YB=6O1fIldeW7)v;@a2#pskAk<{I%rkt01OC7VR>T~gxjo!73U-{*I+d?JXt{| zWCTNyc@5mZFb=$L#*tmW(;;!N9PT#{k;cRhB7Hp^?4>S1ahNmb{S=0%;fK(>%V$)6 za|3xBY&L#C{+Lwa8DV>9JtK}z)E%$b>*3YYx!Bqj4;x(Y^Vry z;=E8i_%=LpEydL3=eZ2#Zgig3h|B9FL1Cc|CJI)9dHfKLU9lMKZ+!>l9Z~2RJ&yBH zMne6mAawsOgS>!?=okMR6VEL`(Kl8om&SRozPsUz+43kIoCq_ec4J1#LljS%h@55w zUx-wpZ0b1tEIb7p!%oAa#dB!Dz)9?$oekUbqiNMc9@~*x$e>P>Hug4q*LwMUG9yz}4g8NM*uFmu&?cIQa&)KUT#QgUuMePzo1x7((^$ zJQ&k77N6dWfw$HjWW}j$h^&7NHvQ3%8ypIoe@;f*Gm(((!Lhfj5SVp<^Zs2kLYgQJ zE6gMqeF@GPI!=P98V8akR)Y7@ zE+!;^^CaH;3&%GcCql{}$vvsnkZ^rIsBBuYi$r za@b25^z~Sdrd{`-p=vI+FJ1vxzq&xgqaP5ZZNp_K#^ZO_Sd{-d6Y{+m!Ii0#FKZWPTLdJcmcw9KAU{4%_1+aWir`0icC*z zDa=hPhWi&A;NdnG!hh!gbG%Z4T=5YG@=_g)X71qDLLp$$xRPv5e8J>q43e`e-x1ji zaT2|730#SegVakVAW`Q8D#Vi{>WQLs#3j05A)PYNR4m@od1+W zy!U)0i#QLruJd!)>Y#&oPZg$XBoa+CK^Pxc4|mFnq5P)-yl8X;ldNgzuviWk%bp@1 zq8Fk~?|ovi@dA(Ec|IM$;B@WY5p4x24fogWU$z^I}F+fi)7H20E&0wDzafbl7 za^A$VmuO;IENdE`&rW}~jJ=SR%*HPlU{^Qkva3>Zs6dP(d$5YI_ge4K&%Z>e!Gk_p zYG=iMa=*uh)?cKO%WvbykU!k{DU0bEP^W%Z6R34*BDI(^iHdGalDddT6# zsO%MuM(r~i*XY-lq|;qvXwNKH+A2B+yUR;)spd86cD#q`Chf(Aak*$#nM?IVYM|OO zmYR5|(3+J-_;*+dn%up)?&b=-jUh2~;H5e%)MLimYTdvVMq2P>hRR1}jWb`c%5d%c zbF84W1n)#Eu(uSCu%$mN3}*!k@N{yH8(QFR-n#7L?6{EYyh%THu=<;0*@62<46`2e zv5~6&hS$;sct*Py82YRlYxr^BeAd}s*f6E|y`gt+J^i+BGfbitwLsiVb~+s)FTAjovxB{Nqx}O z9D(i5FTvwlHGI0Zlh{x2fM2#Xkg|Iw+DGfb!;B*Ge47>oKU)t6#p_{fWNKcsbHVy8Ps+{rz<)z4bh@7hIu&PZxv5Iv{^t2 z$`28gvbqmO3Nauneh73fY{B_+>oNIH0?LL8VCDnPcCxY=G`C#<4<9SslJp4s6Ha4E zlEA3&M8&3U#I@EPvRB&R-$|Z$gyY_~jFNEW{#)=+_6$yfVe)_2d-JfG->Bce87Wa| zBqX6k?R%~Jvv;OQnowj)A|;h%h?KcWrFley=2=J?6BQ*Hd`hq+VCk0XERd~__Hgv*ML;P4q*7~z-( zzb~!D?%q96_%IOF7j+PGeSg#tb_1y*72ITa1Ki(L(ev_o82uoa8or&3BYecs_wpfn zBT0+C?cG4%OgKp`Wp3fr4>I&cz<%m|D+7N;38?t$VYK*!3w@rUNtg8MP_dh$+_FCt z=}pf!m?ZB+Z>%(+`c;-xIXexft{p(>iCVNy@gSYBP@c*x8cmO;JjTp~QY>?vM!iDQ zF<6SR6lWBo{Ptlqbz=s;(Y}G*+xKJdXpTPLH=Nq+Dni5ej3IIM6xx5?jt)P2F|lwR zO`2;$N9B5A`!hZrl|2Nd-#24a`*TwJYXfxqwlnWoH0s$`J_^WRpI5ki)0(y2O2FThCzHad?A*KKKca1F9x7!=TppiJORQVOX7^D2QXf5 z2QKv6gDXnjLUy+T)%#QlM?}rgBfbu%4d{T@7)umnPKD!*&M=VDKvt#5;Iswbh@{PC za!^YZ9bcV;U5CHHP~ICjt+bR398<-#n2C5U{2M&^VTjx$W7xV^6?&$|lCt12FmHP) zI<(25$B-h3JvSQFw?BndNsA$(RtVpljX}3of4JwVgEw6_L7?h8vP5+o+rN1MJrzaJ zzMJ`JoCvH6(SXuxlKAxFOQM|d5L7xZz?xCEc;2K2YG==c>J7RW{nnE81Qvs=nkinf zmBr!v_G6==GQ59(9Dg^(W9;D{xF%PC!eyQ~>{c8eh*H1~>l0{K`~x%#2}Ult2$^wK z*!raeM&)#Y3gZm;_@&~FGcvf?P!&@vN8t-UWgKvDhbIdew>)GP+f53=v`3$zthOAi zJ-cCa{2^TAC6DKu+_3Va0=x{B!K;$Gn6Tw3>8`#8ohp~1ul*_H49ka@AThknYk~sR zcwjgmB!Bc^0-K%98!{YzxEH~vm=ZW?dK<^cGFMEL704|*OqAurK;0k(o)p)Ub6Z${ zJNOd0u`8cEJv|i?FT_Bo&jxV65eV~=WpV5JVCXp`4sXO-2{|wWAC(;epV8AHds{km znp`6_)5o#znNrwfpbca8-GQX}lVMFl45-i5Lgl>^aQZw=^t4k0FTWMgvZRxkx}OE@ zr(WPT<0*9Ldqb;+1?Ueu3cufE!NDIX#Oz`*jCq>@ifU=lxatpBWL3g2yJw_L@e1U) zzJfzLcsT3cVzQd$SFZ>2t6;{b_F#PKfAXt;_|*U8S3WGia$@;aIm@qx|0h26EXJpf zX8F}pmS3r{{AwM`ufDMS%K2Y>>f0>8vUw27>3ELdoPBn3B7IZ2qMY>Rj$CEScTQHwl3N_)h?;Ki>1atCD&bIqowNMt8p&vSWn3bit#3xP#_G_S z$Kz-(e+-QjYN9h$-Kg!D1iGtd7adk9Pb>CZMitq5YCJrS2A(#j)6CD}jdcbzS7Q># zzn(*H-#h&O*NOjy0yENDgQ{!Zz+pKzP@|sEKW$&|!k%mRJnj=LCu|1f?jO{z5W_D9 zhPZH70KHw53ZE;c(Ao1j<2l}?(*1)sjdt(up`#U-An4c`y*H^|?ol7a4A&O0m4lrH8%+DG%Jni%m% zZy&=Y9h|`*GBAwW5$?@(>QXMd>LuUR(T=n8Qs6JEaOD<6+0n4Q+qqub6mG||Uw@@e z6tVRYgs9))Tn{}HB#s%!&-s3wQ*Nvmv{zSgu?nmBuOAV9QK>oqZ1#J>+{$9XsG*ho zJgGDM`o%%~?O*l_Bo-PA=D3vUKhn6xr_WRb=3b-t?mGfGGw0{rm{|w-XKon??s+TX zm&u)Q!J~*=DqadIan;22@p*iOYeBw18(rp9LZH`IHnVjBPFlB<_d}M$fKn>%*3cyh z6a0vH&QOrj%ZK^?g)nZuI}T)S#o4!qL$jC^<}6Uak2YE;Ma<|fqcK>p$(-68X0tb0 zo6#-r3hLYzLyy9fY@*_+S(gtQNn;=FB(Z zvhnNb-um;n|F;M32q?j{t^z#G`1iy|6YJ{^8p4;MlCdLAxRlcJqUQ>cw<9*WcnsIb02o%_s>t};=epY~YNC({#frSlHj z)~rTfj1Z=hnvHbaMiIKh;R+5|i*j+pd{C{@lpgDzPM1}Tq^8fsx%%I7xb9sjJzw08 z^FxK`@$W-v@gf&$TiTB8_Rmo5#7O$lEt}#I6&iEr5vp!+M^SSL{P{i>Uv3CT343YW z-&qekh1S4VCvDg^Z9W9u;PBTbHjl~X5k&fh=#UBDVdwp~n9XKV+~4-Wqo|?Sogsq? z7xZvc-3WYTbpo@d-o=2|K`0|Bj~nN@qJHue9JQ_q&bHozs%^K3oZ2-={ip>KzHWqD zo?nUIQ&r$LYGKL}HI%+E9ep;aVA#@^FvDC5T%NnZfd>I7+dLDr%uS))>NT`3iNgoo zDR?*i5bSOF4*S20V#T8bIHaBeUp_QIm(>LjSvwEL=C1|m2PtsDGlYz^e+2n{5}25r z37yy7aPs<}@OIZ7kbS66L>B6Tzp6YO$Pxp$%ZEtUv=`7Ke;g*qeIv%3I$*zN9a`KU zjhi}l!>^rLASyo{)FcgX_C5Je8UvVth zxf5sT2cTc>RuH}(j6x)ju}t^kSLWhgbuR~of6YVQr`~QnT+S2l!1uQP;7st zPgT;Hi__s0q^P=KALCm7UiS{$UVR~-ECo2y#T)mxNTKxNVaQ8Zjh+McpvbVo<4V}~;36nU zu&le)4gFpzVesfm_%kgAWope}zCjM$J^c<|z4$@sOF3*%iG%g&*I}cH114vrki{o< zvG>w&kh$Ok_ID-m6>mE(xm8NkN>0Pnb3&-+ZH3Pl+=erZPaVnl)E5|^T8r_iMF;uR z8H`W;gYl_{GCp+!<5Pe97oWN#?d1Q)ryl(;K6S$&pL!|dW36U5hwh19Zer|6%R8n)OR-5toHadE`NTI z=H@S^-y;2R)So(ZUE@vn&lp05#}w1jHDmuuo%j#mXEo#d_zd!W$ROWm|G)S?M+W&m zJ%fCoCdT*a-Jwo(|Hb#I8RYvYKKkGJJ~E8&bM`+uU*--vU;LT#C5Sm+`q-KGf;nI0 zne*iwbG}$G=gXZz=Zi0MzBDlBOA>Rw*fHnJ^Fim!tU>2X26MiYGv~`&hhp@qkigly z4XCs`V15AG6^Q+1vb0H% zcR@~Ip&7zuIII?A`3i9sr{wj!r`q#X9@Po<<>#*f$k-akQLzU=`QesvB1?$a<%YMB-uyk-eT5n@o!{1HwrmZ0Aij<>`_9@7`~oXPF^ds&RC=c%DKB^($DXZ z?=%DKdR0*Is4wjE2*WGg3-B^?Z{@@d;QKc(@na$Ljm(&ZQ%221hiwt`NBd|DY+j0E zs=F|@Mh)-Xbf6Xa%kfEhCH}DvM*loTtk3xbXFQMKlh%u9bxa?vUdz)l4_DEhMZvi3 zl{@7Mdht@#F>ExsfpdZ@u=BY;wVLFDn=ORV{?{AK5|+iDwo2S|;ttl<{YDYZCv@q> zG6-2OLhtwZ!gPTcnrOEpzdHd_XP<>-r%LhUHZdByiA^jna{ViH;`P3*c)Y0t%=GLj zNFHbXE?0Qqq@)w5ed{uAaB@X;)ofgC zK8lXgh{gwcD%AU5MeY3uT$EB5Y$1FFNcg>?yax?JB*bJ;~GvH{$ zH+*%q4i}yKiLjv^54A_qjR7O6^6!59X7+|&{%D0p-+F10o zD&Ax6Y~43(htI{28s5^S8S6snT{#n4WbqrD3MbOvZ7Dc@>lzx-J%z6FlcY(WOqcw! z4>bmMB3mn_{PbWtccMR4`)Z0Kr=p#48!Xt5 z^sR(7NG1Qk;0dR(==WtPPMv}d6W3$=UO#Y1$cI(DP`Im9h}yUHQDPOrv#Y1W%CxuG zb}t+fenbKH^E6eDdk-v4R44}q8-N7 z?}Cny3OGD<2idkP2|m5J1kz(NU`m1{+!m<;yXg@Od!rLEnw`E zaBvVxfW#0{$Qm^Yds?F5sKE+odz=PqvIgmmo>{F${0i#Ax?&@QiH1 z>+3w=*o<|cQ5=iDKOcbFt<@NK%m9U!cd;Jzc_6=K&|vR#xR|Gp0=5VG?LZ5LPo9K2 zOJtFMx)OBQd4I~x3iW55hyHCh;HvskjNmdL(jy;l37gX=Lp^Z$!nNRPUJ3ziry)Z= z3Og!f>6x$Z(cw`L4tywrmc`+4q5UJQo;4PiN?rpUkpS4SaXqB!zlFvzvq4_14LtT< z0mndHu;1_yL_^C!`hqa*d2<$8qO)L2X*PW9sweHboy1!<3ADy~fgby}LmoDuRoe_| zcGrmLp?bJ`A{!n)bp(UWis;y44-TV~p{Pa(OP5)|(fUM^P;Us5af{GDY9^k!>H}rL zHRPe;Wcb$P3E@AQAt+uPVm;f)#>`?6(shQ%nG>N>JrB0ljlzaXKd>CC0v8{N;QiwU ze`QW4q2v>t;xvkDzwgaG+fzfAcbuV?XBTs3Zb6(x$$IYI+9ukyTaF9gtihfAq|D#0 zCd(cFtjrz%c$Zepl;j$w-KJ=*$#pNw=3K-Z>7PZH=;}p$u6)N@?pJ6vci(ITr`+Oz z@=+l;vvWP|Iw(Zxl*>4A7ScL)EpdF_Xlnk6ad5=KFr@V~UKW`|+jhHCo5lzX3HPS6 z0`qZPHqbf^0UgyROpAM)U^;W5if8PlCUb=;4qZcILgoI-dH!&a?_)m5_X%KppA^RT zu^;676fnL|8sqyo5AuD)8QRe} z`G=XAz?hkegU*+OgU%O+|Kxo6hncyaF*E&`^X1f_^W`6A=0DDtCCvFE%$zT$nDb@N ze?#Ysqq{Jz_j(48ubx8j)rL@0`y1!24Zwz?v5d903zw2y;^5Sg7a+;&ZgQqvEeI%qtdOC7!~axe+)kHxZvnh||R~-n2~m7rnpzCGbOb(t70u zSUKkkjvj77!-|*DvYOF&@u3jo=&-ZAHv(7xd5n>_Lh$~ocnkp#9PyzYM`jx0lJx^v zKF$^+EOl88Vy1!BfhsQ#Lbb<#@CM6EWQd+60pyHE)J|0rKMf-&}4-w?cXyLt!HV&?a8C@&*@|w{<0U& zI_<<$i*JDagrWFUWi&P`zea9C3y{jY5FhX!^>+rqn$GP|eYgV1%w{}%+zb~_y@{#E z{c$PtyeL^@VEsgUtk`XXUd!chhW6g)&z(huZA8q^I+KSWSqN06NcZ}gC7lz zprm67@%=U$5>>_UdAlL(70QHQ(Iw#c^*p$KNd!xo9@zU^7Gm{gf_JDXj(sN!3#+tY z^2|dJ)T@XkIak1N(Ofu@A4;56N5IT$eZ1A~BSBh`2YL0=;etRwP7G}4wG7oJ=W-{J zhwuD|`FuNgf8aJRK**1%+g>L#Uu-7IGKoaH_6$*snG6Q2&+&RxX7i4N&lh@I9X*gK66zKjo3Yit9&A}c(5+Y}zAk0Ra0)#Q@oO{li)C)cyJ;O(>P zgjDfKlV~8(J~bZX*bHFg)EUI>>jPNSt3$>~ZQ*6j&?5X*)lfQm8b~-t!z+^>-j+4O zpk|g26Az9j@*NtWU+GF5JfnGQ=}%C8vH~v3)PS{rJa5{TBHsC|*zUUIG&0F#DG#eD&lm?e~%VXH{$>@`I z84mgeLznS9K(|c%X|fiqH(WyrWvG5kF1CN)&%$dHBQgQ>Y zTQ>rC;}{X^3n6x1mJoPmF0q+vNgm8pAwCn1kifhuBJ#k4r!z?s&Igv0dV?08TloTL zupde;EY}3vw^vEgfDM`SY9u-OJC)}#Z6z_fD+Wq?YRSQGV~E)eHCT7>14&NOn&NN9LIX^Xi>a;6_6oqI~jWZ`i{F8T%UOZ!XGAS_NQR?0!x zjY{6RF$>7&g|A6Vl|QfXQWbCi>Ui?5y_syDY)p=R+exfjx=BaXbTVC4lc!j9k!%d; z`73)Y2Bw-^^i@B)@qi>}=PAj3j332y*jsV^*F!0H2+;TjZT@#?aQ9YMJzeWIdzHumEPU| z^#S@{_{D$tK1Pi1`62W_%wh=6tz4=zO^|=zPg!&Xd{0B2L zX3+WajWIJVnDeEcF*7xp^W`REW|lE#X3e1UrESppBF&sH|1dNE3lI7K=^w4~%>>`@ zYrJR6QovP05k|5Zn@E2?Dhb=cw5nMsF*+8kudO0SV_!jp!4)E6`xM@+`U%#Dq(Qe< zp2%O4z%A)f@MFOu;L6!-vf3wTklFyDW;H}i&;tF+&;Q%_Sgt;lBbA7HBLZcJ+3o<| zb2|+pBAZ1<-l*pF7vz%6;bO!%xkAq|U@>{$G=-;@Xhg(0JFQ^EM5$AnH*0pK~$1w1&P{t8Nl zy7xt7)3R-_b%Pa#Wto6+-7pw0V9vO-)8JpY4jvhu1gGpda8QTf3!5q6zGFYUJ1h*z zm;GTB)w8m%mW z`2|84oR=&FL)0ZPF(DgX zOXT8nr#-k>jLo}U3q^aGAH=b=1CANZ!Q-=~(c+mdazf208DN7WM)~4l#RSanQNfay zI&AyX&*qt@+xt$@wxC*s!?74Tr`Gd!Z?3Fo3d zK+r?~GzQ3yF-&my7A{jBU`+${Z+BF*`oOS zjU$+LNRaACC*BR&1idb8N~(ib=p{TNyu#6vBz8fF-o81Hc}*H~Vb`)MIOZb5TQ1f_ z9{W}D;u9kEEHd+m(Rf*2>smhZJRQ>0kXxsxq@2V1+P79OepZ6s#7}Cx`KIDLf!ZwS z4a_3F;xb_0!GoJI3rR|^7O8$94M~}+;ozww5U*_rN?*gE^_LH6GzbEo@nq5`r$VaI z4}(UH1mq9t0c-7Wh~g=NUU?ZjupLUo4t*gbQzhZU`4_xbDjZDS6-q9tSQGX1T2j!v zk_>;AK>VMy@Cd7!ZuLbGTX%ph4T+#P$)9IeI{}n0PKVE8ZoGQeWYT{BDv3k`a9Zq6 zp4^!Xx@V0ES9gS0w(KX#5_TXDjCO$Q?{3(a<%^*58-i#529@>F=+N&5EneHPReA@i zZBGLC=4&v_WCOl(JAi85vZ(Rr0p#!g0UwlCp?k?HwAj;2x-#=nK(oofD^E~i3_<4= z`LLyM4Xe3F;`LqbWXre-L^^9C`C=AM>ZS(smYpBq{Ye{5cDxdU4z7q4MQtQ?R)OT> z3@wPxX(C(Bcag7haYWZ~EbqkQz2xquM|xt#Q+N)sAw0`*HhQ&n@e4wC06O2EP19__hV07yXk-uU| zlv^S|JyRRjWejoiFw~6B3WZq;^gp$_tu(@Y34A@4(gdMX<_0u?_Qqf7| z9J9%uox4HwnLHTzCWB`|CN!RB*Ir^_5OSpe>hEboy6i7Fs$5AN#Nt4HULcgjZYFc` z_$1xDhwZ$V@RZ4Xkj-3AYBwE*ueDjctFt8G0n4w38U9Z`^&`fo4r2LLGRv>l59U`V z2l><+S$=hVkWc-PVUp*M)Q@b)gwGqp&^jUtT!}!$JEWetwx`C=1exPLU zZ7${11)8efPJ>m)aWB%sxLm*8+`P;Wv~RH%ry-rn)vv4JK5jAOoDT+YH8-Bn_Lv!* z=xzrp)hNPUCnvd|2TxJ8z-2UUr#iQD!)NNR63^vbPo@9#Qbk5e(-HWGTF*Ac*{ql9 zU)c!%^il&^FZCnqrFIVXQqQwqs^x#`r8W-sQrELyY8&gNie%rR{J~yoI_srgW4+Y> zrwf|@$NspQ&#doXl23P6y`@u%PtxJft?1{6&#-&+PpD0Ep>wR(;T;D#TKn`glicgZ#-nN zWOq*PT^?Qc=qvYQ+9j^K>NHnAVIOBw@t!X7e8NS3jpo17y+yP1_334ek^DF8x<_$W zH}^o)lJ@qm;Xb&Y;7Z0jbJrGKr(r8&Ia^U#PB9QU-PB*))ni|{ZF@@p%KIwGQ5_PTU39=4jsYU%tC(N7A3*&J|DjMQz1d<<_L%{l@nXQdFY zX-#lday^LIdy{*w=E8;KbMT_$2N5x-A@Mi@qb+8^%yZRX+;oqGn~#H(cX{y0$Dceh zSAmgP6h0X&C66U1fNZrm=x)`*1hH*+^pFVFDz3mfJt3U=eke-nFGJT(f0VrMj7CA; z7$mMhUpTE~`@F80Z)lC%YTv>29$739HOJj!dSR)=1gxkQ#j;b6;L-R9*b^v?!ycZ7 zBiH{xB>DSw#hYFF8+YuBzj>MD_^87VT3JT4>#aA8^7F*F5e;Ofk1aZ>FNKN0S6D5)1>WwML7R=5 z@aSZHDm0=Omp^!llZ>;m(*FS#NDrgNMa-KdKZbJmtub}xc*u6niVi91!F>Ka?1=0FD~C2*zA+sAj1pkHKV4tVTtCz;YS3f>qE z!*R{Sn7d{y&PeKk9S@yh_n9UL4%dYY1zkw;8iE~p7s%mxT_DJ9hPs=V(6Znq*$$IPO3*r$Dt3^b2ob@nsZR1-(8i$s#b_d+Pt(+^Lv9-i%<24#t} z(4}4tZk|`dX0`ROuWmOe8;D`vXafjt$OC01Q=I2{2hMLWgXe8IiYmf1Cro(Xdo%}*hsMB z!s@J>SW*s{`$8Jdsuqx~lQQAM>qMCK^*HFxXoJ|Yov`U`D`Yu@lfEOnk-E2_P)Ht} zu9}Ufx69)BEz!`DV*!`1ixCJo0p)w{fC^*4$~#!Xj8n0&c*OxYHRLjY$W$EGiBKh$ z3m@xm!rL77dD0pR@j5M_(pLt14ddZvpcRDr=|G9XeaIg22`-eC!=f>HaP;6r&|<8| znZ8p=)jA*0a#cjdNhisWM{#trzrECS~3&xE|`Yl!C9Kjd3YJhTKbPj$8!$aGu7 z+C@QZch47J*oKnjeFxxz`grt^dp=1O;(^Q6;odsgFiWd2o>{ zM{#|-CUYi2L%3ZIl3cEYFSlc(KG*j;kO~i>+`aAwDiYw!8UGo@`7gQ7tv+qYO?}r( z`%da`!NDq=U&cgkS>!|Rw(Ndx*`R;$Fzclz4*CbbGymZJ!CvZyLI2>@fB6U34faw` zvtH`|$v=3S^-}i^_EJ-rfADYACI4OSV?wJw^*M5q`7F}ud(IPG8~V^S(wlb1pF{ay zKdIrQx#-B|Hq>Q3uwj`iNIMzgY__w%LD_{~nQ22y4-`PhezvEzqM7~(e?(U-P)AjR zvs7Y{9-X)F7tYh?{>tkilIOvnzM-9)vnicRz8l49eF>!T39Wp)7tP$#Xn%T`?M@{Z z&E_iVnz(sORQb{6-h9342mJS8*XZZD>-i^|(&*Ul&0LWCQ|`nraejC6H7*>Q>Gs0? z-0h~boCE_>|Bct&@mq?Z_*owRo#3#*KSG2r>XXbhRJ-U;$;uV@p1#GkR77$KCsqm) zQ?dk!n|uUcDvb0qF8mZsbFt=IdEFIc#whZ)$n*(*y{Zya+ms8OGExK*o=*IVy$bx( zhClf8eYOa?vixv#`7sn9bCeuAY=s6Ltz=rf7Ctb)3!Ar`B&t_m!wLIYWWor>1-iQv zrQKHH3dWxf^jJa0X?Mb(DXU?pxGZ=Chm+SP^Pp{KDukS9AdR#AVKdt!ELzqAPuj)N z;P?^@DzZlTC)HR#BniaU9mR%WYr)w499|XKfP0){P)}qCT~*Kr=@C-Yxa=H`cfAc{ zO)N-WJr2ed4`7a!F22sQ#&G6JYthrjsT*b@5mv!@zhm*9Wf8dD(Za6^saXFo4Xl22 zu>HqUFcoGyw#@`jm5cZ+i;>+5$^4fqW$xW(d=RpR*Uc<+e8Q(t54zq zHtX|b?E%fbAwi_c={#UMXMEKDVyL2a9aVi1m9}SaZ$o^tZA*r?Jf(^?T9_*&M3yt5tA`d|1&IUc!iVt+WhX9R)h*r6<{$a=?gejJ^JfnASFgj(BR*ky zSsKloV?ZZuh@n>mg}Agy8e4t`A-9~}ztjl2BuoQ;Joy9HN=MM@>ZNpB#0@HDK7iSa zQZPz59KThbLQj!(IJ;&eMjlv6AFexu@t-SkWkv+P6TJ-{Kd+!5Oq~9j6oQR^qVRj& z2&#Ui5Os%}xaTXG4MWl z8a5lBExid>V#09xVjHqeGN1f#cu!m{|B$e+>!Ds=nJd~3}5>KUE#^cSXif72D{CTNW%e3LUc-D>*Raj#O6R>Ph!1Qa~XX8^AbF_+=27b z>R^R)EqvT?0nO^~!G%Ms@l;&`7)hKWHCrVx?RYva7!v^tBLiV$*-qTnYl)#<1+bC1 z%ae|8#_b#RSJ6 z9S{&2*h=2_{2|)Db3ifJ9)4sfz*~)1BqA@GJmJPc&H^4@_3I-m&KW>ojUgO!&j9yP z4?+8$6Vw*1Tz7Vm*mJ?)_A`Wt#}9>s0&(~~ZYtRp9R*fj&7gPp3!>cU zM&8A$;J)-?a#N>^TvucJMj2T!IcptsE^p_ZS~dyhU)@7G!r1lnu;uU%pL#OmQ%4Q* zsgDoxsoyd_bpqp4|6qLT4TF4YMaHL2W_;?LfAOg;7@u01@u`g%pE`%}sdX5ix|8v# zEB?i&cHO(4Gr6b2P04TIM8Eb?x$lR#SF#(phL2~sl%aRIJ*TX>Ae{itwN;isKk+wr zsY;Chcv&&GYmosL=yI5wvagUED0StG23oj{`(99^k@vVz|Iys&_pS6>+!t>Bu+7|m z_77SO`Ujl`d#T8JscV>jP>=Zs<5(}%yZ?Xs2N$zmY8~sPYOr4FKmNggda1YmQ!jM{ z>!o)7rC#dzAxlvAxespHHW}rgs!$)@v2;h1GLZX%)%G?$jQaKW( zRhG~O{DEd{zjDK}DC8Y(hp2HS_{dTUca#`oes>INA3u(f?>RiDs?Yq3vUKOaoG&K- za=!FSE=To2=gS=Cd@*It7b!ODYs{Q47A(Kh9(2Bp8g#xmnSP<7%=yyIoG+rx`6B%< z=S!-427aW)_+{65*fsJ94Twrb-lQcsZsu94yEB?bE{~^E`Jr^%1z%`eoZ7WOmIpn%>?x{I8s~kdVg(2bpmZ z^H*^0aD_|o4CKPh)woqJXLF~DG&o;Y6x^#d=A3SpazdpJwCqSK8r$Ym1M@MoFz^`n znNH;E_o>qn;>+leZ)W_Tg3C1QxgRw@UO+?Nh11{rpZ=A4_wBrJ0SYz=_PI?I^eGka zz1yV(5-zP=>o;Tm6V>DVl*h?}tKqWz#_w(d(`}QuK*>9_{K65Azl(4Ifd+z}tyTj5 z)>>*|8PAOi)700_DWfjkLfkc;0{@kRGdIgGja$C|KH0bS3<>S9g|*Ci;8f-Xz2j@Z zIdBuyxDSK#=NsW`Wj~l^j)(B0#$eiJK*S5rLtxrT;;nDZV>J$gm0xoKyz$1UU$0;+p|-!%4jZDxuk;AqW4gEj*r)M zb)mQ25@)9;p}(;&9?h1gerNZh_Wa?fs#yg6(vp}gFHALrLeV021N6_yg4IorF-v0! zE&1{bmD&G0ukXO0XV>8P&|>WJu7y7z58{{=kvJlBJavfPhniY3ko5TkPQnMcRmu;u zlI`(BpgoRwrh>cYK7*^FTXAD|9bPHq<7gWK!{ohDinGLnwk{Z19Qs$@yVsU2!j}7$ zu;ceedhUoVN_u}oW2v907{3Z+m$H(t4fowEtNt+FaMgM!8sYImGI?qyWa?-bX#2-k_J} zIn#FeL#Q-uGL>`G#NYhYRPs|lE*+7CYuyQ^b~NEBkHdKTsvqWePNU1UU1+hr6t&1# zLNoay#`Kn;2a;nk+~pd|HtEuP$Bv)PSA=1#^KO!N#plaBQDAy5z3L0^4k;jdq3Y z6(@jvC?s#>%3$d{CDhv(fDu7&K<~XWjBp$vl?py6^Q#S31`frbPuke@!2)i~s)mhr zS+L=A1&Bo7B$iw*?45fBWW-NHbgl@B)~vvfLzOYBWB|^~J|q>D>M*Av7=BI)0Oz@W z=&sQN^9z%}<6bPeG|djW$FsWpMA1W#B#x3EGNB@_9@FepOeqsK>C(J*n!u*3T2K|GT%s(jDr9zXLfAFShF-kH2;4HKM z3nM8z!mNU-m1%0&OVlk&;4{dZ+rn=gZcxl{0 zIQ?6LZg&ZyW-AH(4`!w}bH40j&KJ``=SvlHzO*i?qhkl1FA0p9`E8Jy8ONM2Pu*7I zKg`Te%=xl+(D`CgTY@mi%>2>UM7It)U;f5(^*8S0!s|M+BYY+*+*CuQkO2HrcNOz` zB5(uC9E9}inG@a=a9KaN?~1}4^%z*47)R9>AA>!5f8bWPxXB^@#qR8Ml(K;Q^8Z==2jt??-)$~8nfM;Q9@M2VD^IG!{dLd01X;4pOoNM;Qu-Qnuc=eCzD`}zxJ56mQG z$_vTkE*)^pegmn#Uw9e+oC}+_K)%Q_a7|ah<}~OgOyN*r zGoD%X4Exh{u&k*O-c4K!GMQ3nQ*4167ShyZpb3Yv?{m!XhF?q0Lc{KQ5IZhMBa%dE zP4)=y5k?=z5}&bA z9fx*W;A?gr`l-Vej9RTR=cN#u7+xg`haN(;_5t|VG8t#wIfxBi51{{!08YJmkD89D zIE;CrY&IVPlQj#_dQ}j-9kvKYwwA&0^J@5M-D|KtvJ9iwyMnEK=|0GYh9U#{2io7ds zvgBuVI4?wnBcZwZylL`hcnKB?yuA1l(qfrP6qjg$kVlE$5rc8OnQC*1&w>zWUipS7 zO?VHnzADh{>I)G+7Xe1@1&_@v$IBm&_f$iP2b2JNfFLxso44%rY?!n-oczhS3?&bh z&|+UGj8s2GdR)AeHeE6oSKz>NGJ!!SAr1nQ6?+J5pC|h`t6#?5wdvq7E9{Y%wzh?&NbMqlV z^0^RkJeYh^@ZrrDuOa(B{^m6VJ|^P=@4!*5%cRdPi-2N2Iq$fZ{8(Q{TzXc3T30y; z8r;ZU!!)8T>_u!pxHHE~A5k;eM>OAdlc?y45Opq?be#?4&6;P-K0B%`H``CviMH{^ zN1KB2b01#byM4U0(>*-uIh|+n@hEv_>&fdKGlJK>XdF^7cxx-Y1z~ysPs! zKqPG@3$qWyD*FydKII5up2MLg_YAA!9+1{^>TvO_Ht6Sz5jiDU&}M7^Ve`|BBNIp7 zUrQjW4*_oX#gTCt1T8Lzz-EzBHj{CcNEY^xbDkSW(yB3_R3d~gA1jjNvk!T->qUrP zLMRE5c*EOfd6;Y&GXu7J)bZjU$B?I*i%G%6%e>gxh{^NYf8)Rl~9(2A)4LV;cne(NMIbQ^f znHj^FnHGc27yCiy%O=LmoXMOoqnPuhhA}h$#&h*I?ql(=7-C`B%ye%pWZ_R$RLK>F zD_%M%-28|roysQp%E$2Blw6`7wVa5xJO;z@b4i*~3F)rk;9cfrI25x0dS450`k%Sz zqI4Bj2gzf5=Oy@k?X^Md4Nle~8ZJd;TwM8(XB*L5a=I889* zJwB_<+jc_&Y_2QtmM^K$yMEn}v`C%Qn_)SH_fk8PcO*iSY(8Ab^Rv$;*={F^vz`_i zz15$HKGh(WWp+HLaX0lA@Y;Agl0@KJ>_fW-ctS4u<F2k&JpA*tm$^c&fM(4}bb%x@(+KT62H87<_@ zjwJH%mk+$@Y9*JJO#_*J6EK=614lY1K=XWM*rn48i(3v8N5-X4wmku_ivs`3Yd+g_ z7Rpu0g6z3xSmd<~EEehF#D)XdDKiegt9s+A_YvT|sRuMSM#I(>`*6>g7zk4P1-q^F zFuUtMG`K|JSM3J;UAG-)_HxspM#tw{bcIwZxFL@0J>HxL2`ow zypK2x7%&&tF8W2r)vx^@+`V};R^R{sos1PBQ^rQAB(AgfIcIMqQ>9UwRMIFZB{V0Q z=b;Q4LZPCRCd0M2q(l@&Qb~g}3yms7xzGFl^;!4#d#`)lf4=|tEX%*H<67&w&OXoQ z>-BiR8>61X(z##YM^yy8bkyPGmIiQS^Jm!pm|>&eEP-_$Uhtj7V0gQG4D|R^0W%yl zpg}i=z8bdhzJ>@KWecHjLxv9~I=~<6D!^d*gYcKh3xM`Wvj=KNu_KjZ7{zlH(6=KH zHdzbYf-%paSlHtYl=TN%HxGmKt)G}7$`uT-QeXpOr+^bWVIX2pG@L6RV)D)6IP>aZ zi?QseZ=#bwH;cZ?Z(;_*Bt==KJD8^3^(Hf7Z!j0^r-MbaPMEy9Su476OG%VH{jrJr z+Bjy&l?S4pn|qnkvvDSso+nIb$pYru-D8Z&s$ZfoA9-em^l?$m3oC#Pq+q>q9K3vM zDC`G|;hE0kAYxZ6Y~FMhPS+m{jeIHNm)i$ZzD$QL&u7C^R~LfT6)T0E7I`q#QU%`F z7zefsGr<9;B|xvRtLrTr2XBuxhQ^L#;DtLZ+;k*>u{-~sDGH8cSl2|RYHmCjwDJ$r z)T;^-_pJu|rmX`+X%L97uLOFr4WftNFkl=b7!%ox%!H8{z-(E!s3lkz^pwm6x56%q z#x=@<@5!41aE}JN^UgD6=8}Mh?S{F|iOi3Q^3c#B1f1A>9`+ong3_PPGhbd`2I=2( z!1r{42ebA#SU!b-^zHkhQO8lJ7nH+ZNj%$k=67-5c7G*8``IURUL z^l(`SGwg{E6DzOH`2NZ^*;uwrG?x7$8krnx;-~$NG4Yxs>mV4VT z^4KWFBrM$xr%YA_mts}mz|3aGC_Nce-aiA@Cqeq|u=r6PRNu@*!bo@H8Qi-9AQ zfU+9*fZM3`OubJYm~#yStNZ7{ib=1aq__+m(3F7R@}>w@@NHnh_D--|J(Vds;|7|i z7>j(q8-mqAtC{@5dPcTz29x{{GVj-CGv57_aaoYeRGI5CGIy&)JuM5F#ZB`Wrywm+ z{N`p+`e`Xf!t*#|7}zMfv{MS~y6}N%x;j-PlT$1Dm=wU+k|NPR`PGNN`IVuNU%eCZ zt0*DAiV^aw1wwwc;ctHBBji_qg#2o{kY6Dozd9l0SBHfBYKV|uIsDD98io97s*qn@ z5%R12zxkD&kY8DU9?Wl2Jj~6m*XE7OE^>WkC%Mm3vi#Le>0H(po>wv3!%KE~adT_* zxb#6I`8?aj99h%B>CT?Wb#>Kq9X|uP9d90SCmi&7xiM$Cr?P?E*9CHXkBc48yZi9Y zlae_vp-cT8JB1@!+ql`nGwc=16YMFAd0fx`=u%CEF4a!xQWgGosq_DKsSQ$T+=#zj zs_Wk_wO!~^2fY7xm-=sBoB9(tU+&^~^*3 z1+Q9uw#=!2gD6GLF!giNm$d zqq&Q8Vb8T~@Nu*yoRF*lZ(1b4bBS8u)Ru=(`3VFz{u)4T{2XAP(g-95AlRi<0@9|M z!)pPlOop*MXuquvXGNR=kLn?`1YrU`6bVat@94F@Z)-v-@3UErd;m!QjkJiNYp804Do zz^+3Da9ze((6v1c-mpFlPv&2O{Sp;$N=5?cwB7_Ar(Oc3=4-(+H(B`9P8wtjeDC)z zm*5tKbs*GH18PTZfgYo4!Rr(~D6TF0SDw#re_si%YR`tU>C4$0S=#J^6;^C@z6I-3 zcMT3VT*m&C4+V$sj%5ucXR<@S0=9nLWcGTD6x%V~oPBgD7IyaafTa;j*x$#Fz;`cv z*=Un&5IRJb4X6h2`#Li?>iKQ(*`x+y=eO|ukojzFm>#=1yjEZy-(qi#{lO+*Enx@U zGGHg@9brpU_p&J;9oUOHV_0|XP&Oen4mOhn);;DLt8is89D3;_i-!DywqFEx@(n}i zW+%(u>MCUu@}{$eYy8*&Q+Kv7^fBwdq!aFXG?#sT-kCKKd9$0wXR+#&Z^BgzFR-hQ z>$BI^?`7jpJ!R88WLf(^58&cP8@4axJzRBQKD+662AfPZ*%5i;+0@ivD4(Lu)_+eB z_$I=9-Th3U?GO#NXbAq^8;ZdE)eZQ&R2lA^E6flEEP<*U#c-E$9*ikl11kb|fTN>S zV1=C;d*DtsXo~QHB~B{v&k_ybBDV>SupS2YM~lG7P6+ncZ-kS-O0g%(20*djCa7t% z9^70YX8g@-f!^>$_^jz8$Z{IOuCh#pcYowCRZ6R2xYRAEViEy0x*viNQ9i8m%>Wh` z-og_vKR`+MSIq3k>)1xKvEZZ2C*V^wA3U=k0$=Xg0iS$-0Ut#O=ON}W-0XG^rms2& zgI-IroxwV+c77%d_H$rWw;qHFlHpK)d^`x!xegaC4+K4PD#3VR=eqREIr#C`IJUz1 zKIoNQ2QIFd3BMU}u;}m(ApQ0ju-9ya@f%ZwU6x$Nko>@;(tvis%4`l%7L9 z>!I-avEjgdb^&wSCl+p^IRz>S|ccCYs=D3f&=R*k*{)wcIRUf4DFNnQ;U z+O1jVL6hK}P0wL`-UN1x{tQ@MR}Tta4Pyly3|z8lD7>Pr!A6EpgPTbe+&9Yz*f&@~ zJ&6Xm+HW-+)w&icTKaVRA5X5c0>5?;Nr2;AwEff{o5(5X!Y zcKIelZ>u5jlhk*xaBnL3mT3*1s?`C-lLcV4%}*e0EDQTqRRT4GAn@_iJ7HH^1_X|L z2Z!|ThYhRmLm$a%VAjCFkKxwv{Lnbq@lKcRI64`wi`Irx--g4OQ|a)SZVA*%`vT-v zKLmP)RiJsI5!~+C2(LZLfD<0Agg56D0N0LJAT}WkUZgGX+2^ewaApoL@XZ5aMPaYo zaRG2Wkqs&~D8dOIwahuCh2T|7JXjPl8je_X65I%R%5-m-40GxV!129{LCqg|@ajp9 z=vdqqaDVm=(68`<+3{u~sGs#-#N2(wq?S$r+I?Z*pZsd=-~4KpkYD`}@~g7H`BjjR zU)>k-E2qEtm6MQP%@Xpf;J^9ROd-EA6!NQfA-}2<@~bm{^Q%rFziRp)`PJOLqxg*W zC*0Ayro3CI2A>=Lj(a)#EC1})Rqo;J^W65*%Uqj(GqyjlGb z?mI2y{oHqOW3F}Z0~ts7%jqZhB~CB7>&Fgprd`(j7n@W3+e*OS5d4Gsf`9OzE_J`q zrP}@NQmch7^{LRM>iz9f)Bbj;*WPuqNkW&}EOe<$g)a5m|Eo*=$3HmhN)z{=E_KVV zIsY{o*}riXt$cNtJvv;KebDuY9XOi9N_oq3_txsOSrda;#o1%n#RBj6%k$50q$9)m z@4O71ERVoEw+?npODq@eD92tEI2twsscf9dE_O_uDKxN4Vc*_<#JNe1;U+{*=ALao z`mc0%8T(grQd2H*zY91n?@bD;KK3#9)_VrmJ@^T4W17zYOjYC6L5$!#yTad39E3VK zXTB^VpYt*u$0@Z{aIx|uc^9)Ae3?Wl|3UE#yKA6^P4U{r87%JN+=e#uH)5mzm2={% z>300wo_JKJHJv|j@)uWaw}da+)5S5TES^*VyO@mtfzHD-DD7#RY2mkoX2NaHc1`RJJvNPp1*tzYQaOSt)AZx56 zTpC&rCKklQ(g`ErNBbIP-m$~Xw7y7i=1KtEGGh!oSyPLB{rn_&GqD7?iUPr669$}G zeHLCAz8o&n6vH9;mB3s_3Z%=Q0kh**z~1GaU|y9fY=Fz*#$h8tUcezRd0Y_q{VN1a z&YTOAu4RF(UzMQW`vK(X2 zx(WNzlR*FOr7*bKkkyNj`B$ECK)nofb6N)11<7&BYYo_)`)yfV5yOW3cn+sV`?BNb zN}`?76i>{YMj0;4dKy?T2)`)GA5`)=fQ zc1lMV3{@BQm}_RS1~VqHOC?&NZEGTA?_XpWyftD!xtFsol_9LsI(3#zsAVVH^W4Jr z^Q@^!HMAT$h0R|zjUDB7{~@ds5JflVB%I7zOj+v_g#=#&BwrDy*E8 z1>CNyLyw=)u&+8EjB%R*yH}t2zqx>7x#w_Y)_it_?E@(1F%6nnR>AV&a%^Y%Q79d7 z96VBA3#XCapj%EC8kVKN1955ac%`DiYLSBv>b`@YvH2k0buz5#ISE{=MuV!4gV_bI zC&2PA<#0^d8t^Dr73|CE7ThTZ*{ZwCp>>zQst;NM4@||-BBm1B-xvw~_lQ`}5d_w$ z=(BtGoPleNYoNDv7YtLY1>BQfkapb%KAC6^t!+PpxHWO0eQG7F5FB;V&L(WaCNaFn zl)yI)AK*j7=TKsu2zq_p54PM%0DkOOV7yfeYJXh>o!i1;-(hDUwWtI5+UY{QFu_Ta zaS&Qw4g~$J$Kil&7_73s0J^O{gOaOv82c}iVT|KR;1Mf=YZhU+#^654&NqT1Zf%9J z6)n(BatL%OPJpvp4#3XzP4K3THyow<4#-;zuDT3;xc!npv^nJsw378$qdZ4Asp>vB zrSTYg_UXVYwj1G|=?ZYN%}7{k91Umtr-F&@A;PX)C~JN1Hk9p6hCZ_*z_HpE&=jf+ z-7^Gc4OtEy!;65ha0zaoGlS6J5^%Xa1#W5>4Q|}319QC0;dPtWU}2XpELsu{>LjFK zc>5suRLzHJO_&Ga*g??I?+bH6ED!yMhrz^&GB8m8Dr2M350YAR;R)fhHor+=tNqCZ zWepVii2A?^eMxAd77u1f5$3m7CAbQ=!kj@U@}PrA%pqu?h19*K|DSigbkY zTe6@_h7lMZRSVCoNQL44NID68I(+g5~S-K!b7`nC~iPHX%(oWy)Ys zA%7G!&n#wEH=G4?oFrgx%zWm#uM$M2(csVbL@=TzfI;$8fPzyD*k-q$`CZ*F+Pvl* zX#Rc#%v<#gRCv#0B2+uznUEdqG^N2@ZjZ2U)maXY8k@4J8cyu69naV`OLww)0`DSt z*mm~bS;FpJQv~sXNo0SY?5n75GA z;R~f!@h=lfxYvib^G5=Q@;^0p@)G&!yv#Zc-f7}|{)4F@?;<~gUtyZVTNX8Qj{l=e z73PSzokEv7=C6P7tk9+I6a0h!bg5Sb|KL!eOC5jyBzyC3mumXAOI;{*siOt|;0B>f z?fUB<{7;u!_g~bd{^NXc6r3;qg7d}2Ud_8;fV5UaV| zKhBp6qvvw>1?S60ud$r5;CzwEjpn9Te1ZQsUvk%;W7P%cOIU;zcYBR5r+!MAlNX#X z66!HIW~k1X0E_i(OJ-V@hICf=s8<$ahcr|XUP_PXk;DNJq5boEa3*3 z&v4%0wOsVCc(yxnBRj9}3#+$fBb%Ez8z!8#fEoM8bCIGv*3V=rXKkbYubdMtzh7c6 z9hB#zM)Le`SAFimrvPpcBg5aEXT>gCc!&F4^OTJ(dL+ySo#f&VS@NrtWv-@P%s#v>;+tcWxO-XtT<7TYf90G=>-6}*&}yVN z$N)Q}z2xF?D0jx{Ie*vb5ohBe$=}v9;r2W`&$&z5^8HUVaZ|u|?pM`Dt~Fyk%KK)E zGCOXd!1eFZ7R~v5rlH`nj=0QSeYb+wN>Am)ZI!&<3BIoM<% zWNvp~!qrdIfSq9>T(_wM$V*x=)0eD-i|TB_x`E%|cI6}H@>OS$UGs-&vuj}<4;={? zAKL|+-zmdW7XtxTJ(}%EDTk`k&w+eeB0Tx}2=jH?WazOx5_Idu!F8j?0q*@dFe9!W zn(v8%qsFa)IW4QfbxslLZ1e$~K@hC*S`5jSN@l8c3J~q;VpOkPgeU7{;K$fuP#!gd zo{Pmm&C3xMYvsVtXa;DwChS%oOM)TQ6mG3BXKl}4g2C4%Slt=I4$QR6Fv#{D%>0nf z_P@|$p9$`XVMZ&VPGS!{aN!43TKx+KH7P?6Z+W)wK?)q4a|-_aXb0!M5V74Fvh3ky zKC1JioZVm`I*&pQ-)!~qgZjsA9ypMmo=R; z74Ewn#opzD*ssb%S+V~SwnjmcHOp~lb7RVd*$l*Hs~ED61pda6pEj&P<3x5ZyB5w9 z_DXDo`^)iTZ{SpAT{dFJa_9hpSi>SSHp5q*oo;jo{@Sm^x?Oc<-A1{yt1=2;j9RuZ zQ*X{LxgpKkP+7L#?;gCpW;7ex5WtQmC+>wY zwP7I96~P%ti$U@37eHEyfo^?fu=AHGJdi63SE{)&?P)I1xHcbL`DqCbOf-gGzs`df zp4O0T+z8Cv3z*Jpa-i?SOgMID25eC6h6QPL&?Qsw5MNvcnMn(T^XO_}Pq+kBpACn{ z9*%{ER*hi$;!x)7%>*bvSqGYgE(YYD9V`j8hsF_>V8rW8u%6Qa<;Nu04|B(`u2a@Q zP}2o=yQ;#WJ(+DEFrPwc@G+4`-vCu{E>;=Uw zhXH@ifNhs$VE4sFAZvdK7P}_G$EI`PNex3VEbJlEv?vCud>#)5JrKc|i@%xEzhvOg zS2N&JePh^TJP4j?9tAUcgPA#@rXXAI0#J6?08%!-fztak!Q@RgK+bJG*kg-eMnfIQ zNxj3gHyJ|jY~lPYCkfAu-47$TJAhWwz>KQB%g~TaFuYn4h8=y+1bZmJ{+@jB#v=}# zGH(TPGL{f}-v;Fx-eBQO42+z@;Y9r}@P@YF9effED))?pODF|(8p^ONR}4(X`ayQN zB%InE2A3!Qg3~o~!ECLKK)diVv%u{YEb{Sz+4wHhF@n%ah2@3B3NXPvA7X0&R8Mfn%H>)4bRM{QBbu9$lFS9cuExybE$bZ~bC;A?zpk@Y@!i zjhqUvAD#zG-hW}{+oeH`Dk=Eq#4>0rnGfC{O9Kh>x`1d24@Yg5ful_JLQ7*Mcz>u5 zxHi`p1_g|QTW;yXRo9<@ho^o4wy_hWXB)!jZWlpwufXW~c@XZrDX`N$hr;gdzkyfX z9Z(-_0Bd^MK$75(xjQBoNRK-JbTfy8IkU1rWy~Sa`STGoN;QUwNjL#s8w`P6@fqN{ z!3$=6pFMEXEoIi~ons<*PXU28*}$w4r}F!7TqNNJqHTvmJuQetd@ z{LxYH`RTBKh4HJkN0s+EQ^*A2+6Q1V1%E znpbg4=l5M`=Dhdq=0_)-=Z?6(;F_azxp0qE{>=k(K0zUvFIaV&>wiCl*OaAx9vO`g~!Ff)e<%*_8dUqS?CW`@AbTq`g$ zw`*o`|1dK%1?S89zs?sq!TItJGc!(LX2uK7m-S=qxc@I^CK8yLQ~okD|Bd_LzwsKj zY%RMuyAD)d-p0(@cgo`tNks|4#J+=p$w)8J#TA>2$8|9|Do@Nc|V*<}zK zL&V%({}9xya)xPJW`UK%Z-VM2NznP~9470mKeN34C$o3_Y^JnuHxqh19B4XyV5n*> zvt>*ZSf5q|5<3S=9&{pAQ8y_}wtz{TpUSh74RWJ0B`te+*t< zzXW{IW1#7x2)#6)f|%D)aD<5q)czR2=%s!JmlBRb^<~50gra6}_iPe)ClL)w^E$xF z34`EhHxqEVsh;uQr4JMAdq6>?H~hT)Ie0s_7+#LP0r$xjLWdO&P~S@oGul?dZQM}HZKgefX}31RGZs$VmxNo8&embKK0VJax;=zl-FuO( znid1U$V;;;nq=6pzy@~lB{lZoD@S(0fHmuW^CKK^(_(u9|FDY`SF-KX%30&6v2dGi zJiBGa1^6Lfid%Tg7=Cm<1YdSL!(%UovyM8paQaYRm=`$=YMMqur6UUL?qnraYg#&V zK5PI-EdK*q?>q%vi3ect{XS3;DS`(guYjC`nykUKO+dNG3if*Mf+KAXzzy+=aLcJ2 zC_7>a{3@#y6!!&pH>ZYuBbt!YJV89awcq8`IQ+r=nA;H`WiU4 z(}y`XxEW|&$cIHTlVD!&cW_h}!>EOVo2+00{4y^JtRA%;nmH~9eAo;~whI1=9tXyv zMUqKaf0-!@9L0R!d&1P8HELgOi722$yDi8CK?41j7Z)|w14bI=GS&fX4%(gV3N}@aCrG4 zXp$ohPp{PjhlZ(uePw;jr>U_p#jh3y51tEagT?~yZVhmB^&AkVZ46&dV8PK#%YkM2 zYo`2#3j8+Z29vBIWJP9cpxF^+xH?1`)VByMM!}~t@_Y$!kF*0S2hM`$KMR;E`Imq} z^BC~6x)hi*C%~zL;b8C4L|~*93l#r^i(3E0Gf_Fyz|i`4jIQby5WMXjQ}!?$yq42r z*!5!OMwbH9qB|DYtkwbP?Hrg=TO-P=n+(+X9YA8%2cR71%5;O{pxki?$WO@u2Lnq$ zs812|*lji=W%P>qE_oW1mS16}DPbmYQX4aDh&&LHO^n^ZN;vks1hdEWo=EA*57D(D z^TCY_7PO3307oY81P+fnP2#hR8CTT?(XN6O%+e+-+N7h)_{21dD#;)w#KKEtynCvN zo`M;p+cbme9d*}4_L{9|`#?EEm@^`|bw zC?oC1Fb9UOWd$Nh| zLgX2H?^IBxZwt-h%7N9_t`$k}9aso4Rr$ayIAuuz~@A*H>%zvCOMt_|z4S$_4 z|6yiY{bgqU8~?|@@fvqeCo!k*89?p3Ljj}O3#JE2vMaI$&q7ck2&taVPMdogngx$y z&#rNX?XmLkZf-7Pvx&kXGsmzIoj+mSv`5TmxyOt}lqPI`vk>g708Oby+DG|lROa(*kIe`bO8b#iA>Wts# ziQql_CMrGZ49-mV1Tk@XKysQXP(6JB%-31QY@HbjNG$_Qo?_;ysUBFpH;oy;_ZYJl znlKL!6fj4=EMQy}JsIxYHSpqtHhBI19D0goP_HZp>7?brwNhOdBcKP7=}8Ev5ZkSb`2h+!nR^tzAkLX?qC?}{SlU0*1@ozgK+&bIad2X z0Grrj$=XYVvJU3u;8v6p%anM)MXlG_S$7R!qMO#lxzfUq)?Cev?)^ zH~!~$!*-9|Yc_jq-nn_*R^#=X*X=O2a#^=6(M@GylFQ#uuj*12(>Iw{@VdyyC8t&x z;_n>`O>6dH(>48RrrVcYB{ipl@r(2{eCVJv&3qe8Wu)qGO<^?7T(5xKE)C#RJ_DQO zDwu_O?KREY9D*FJyzuy^{>bEvY)RGVoD%v+u0*)VxtbyWxw_H+|5vw0^uMkzt5^j& zpB#c(ALgMeFO*Sp!D#eOGZ{^_V^NXnO0-El0i`HD%LETm}E!C>T7o>Smh~Pa=no=GPs`Iq zTQ#Ya6_PAjZ`dtZP?k(bM$PeC(-b*k67wzBJ?%nSCrB z|ERo0hJAIU-ik?BStb$totutM9hasXb>p$Y;oU^aD2>JixzIx+4N$64523dT2rnRJ zbEn#4+bA>fS|QA~wGQJu;u&0jwg*+KwIGw9n~)tBL$!|iQGfeHoD=MX50_fvaC{oK zk_i6e-f#Sm2OL=u|DHa0YlOzVJc4VqlX0_QIa%Y@N=ij8{QcHkx+?D%S<0G|vqtxk zU$X=L*ceAfY~4-sEk!tBNf()^<3URbyGZTQdV2oOL-aEBD&AM1#5Zr7L(@~saH)3? z9;Tp9Ycp%bV%;6&jP6V_sHzg(bsNdKJ#InlvY*I3E&;P+hh z3hf8;aP*{mE$aHlI%iTaOvU&&CtEUKmEL zBn>d#ep|fPD-1n27eJP!4kbq}T_NYb*kWn7bEM(LL6Y)h9^TVwW5{qE?%#)Jzl=f#7cFVp zz-OM(h^HH^2ICsXP(1FVBEDXH0Ue5RqV+v1$YgI78kJAUsdXH-TdzSA%mPVxkP@=Y z9f}`DeJjq)UO>*ie?wyBo{?c%E6C{J092u_BA$|eulVEZS>kKm>(CLCTB<(dEgA7$ zgzqT%;T412(G*`Rgo9#;?9Tu!JN_MF1Io~syXrV^moA>oYt#0`2K3pWP8?^JiK^ul z=?l7)F3miOe2FVcez%%SD>X;CizQIF^;R*sm7&!{A(UI(`|w(~*V-*H)4OsZU7ZP8EuAoPfP_TgXUrDLOYZ5#@&r z7f)WblHB?_pT_#T(LD89KNk<-n^=eJEmFBbRn@m-0e%YOuoj?t(5k5XbRafY5ayN-q^ z-zAy}W3eJxi)-I{(v#OxsNfmH_ZxKZ*EA=jSay@fsV+s$aZhlX&3At5x{EZ>L!HE! ziN$W!+r-*O58>w$_pq9UCS5x)1ZzC0Ls8RykZ-b4RCQY%j)(c=VC6+pxjzn}Of0_a zI~NbXKbd~D9E7656+Tn9oJ$@K>G~F)A4g$^VkIuMG=D(29^ z2=ILG2zHJrx0MXN9AJW zr>$a}G*9%vZwEa%A&(T@`GUGGKEl_|`cU8O^Rz2=5B^!LPRq{>rE^LKlN$+%;-Z)H zai{pJIBKtg>4^;+$o6{&P~F}E@sq$Q6gH(2=$i)1?1-;L0*J-fwsAB-pIu7UW}%dcq1 z>+Irb9!wL1&+3R7*)s{#5Eo8B|nIU@it%xkSd4sxZUE-7c?jrG=QlzF9Mcx+N#h)Za zbkf>(GS4%MM7xh-$4ZQ&dK0YhRT#@(FJ47W)>z?GI~{ub&JD6L-;&z@TtbH}_rbrr zGqLT$&Be~)7kHi%-lM6bQ6Xx(Rg7N9lrCQ#dgXsXcvR=g*8II{GYNAtJ0p*Q&xaMW2DBaYB3$He$8}fDV_xLd= zt7$Pl-ns(2_YWr;Aw{U)q6>vfJ;B>|#1g$LOL3`(IsJLbF#kWbt-w#RSh9x_2&L=T?c5xvNSDb{sb!O2mw+7)iibut>gY#*Z z=XDw#gJ<3e2=VJs>z?rkS@qxN;Q+zc@m7!w$*p& z`jOwo(Kl1^_1uLh`$G#^u`dXlOf$rf&IIG$A#3?AJ2yILsvPd?*TN_Cq_I=51%9gY z2x}V|;fset$YpgaevMxlO6&J5o}nCyr<~G8Ay5~mAJL~ar`8eafiI*~R?Hi}T#3f4 z+)1`|r;(TUN{FvtGA>xZpBjp8^Hq~W`10F(NSVcZ61cYzUHS0=UA2wEryLGZ}5-1&RS3y`bzHkvMNNN3upVX?w6{O#C6 ztkkPcmz*C(UszV)2>+R4F4G>LxgAN{q?V!mjS*z$vlL`>Nssn^)x#rS-{7Ts&y(Em zlabBk265-s}zLuI_=NJhbE}wHB}1qS?Yjrp6x=BRzc*f z{Q#2Gf>dnMK-P68pp=gBxFc{2a&@&rzEy!V|I!6)_`1V54AlZiMtq6p2KqDC9B8$#K zT87L+D`ey7#Mp5-AoL8~?zRHw!Qr&HZUr@a7>KtXIz$dWIDzQSi)3}Uw5jHW^CV$t z078Am;l#7n2F##p5j?TIg;{oOX=j!{GL{`+afwb~CY*{O-`7EYo1 z*B0QgO`mwE{{sCfK2GMeKc-TPu8K>J972N~eHW**icA=qXR`O2K&&Az*3Lh%90r?dD zWJi3;B5n#X=%(u+dS=xz5^}->?{U3UeCFdLqJH!h|MoxtwJCi~%1mL$HTW4XZaND@;o!t#!qxc>MNGTBCgzpnCwoRANo z8JE*g_ys52k_vIzyLMh;$XgVC!-wt(9*n*h7m-UoqhR7oeA-1Nc!< z1A6#&6A5$Ljg)29;B2R0Tvk68zl*X(juF$*<-?M6Le(TZ60>C2DK+$cSp?otVT*%J zW03X(1$6sQATJ}o9!+uUL&?{&(3Q{2sND%EK3BP)Xl+@7ts3%Kv7RlSns}O=-zZVs z*l|`|6!(P4cx<6V?l|L?gM*N3*=>B}vM@Y*ZXD7&=)_IFRgZjybK_vAO7d;|OL6-7 zB+8|Sp~8Yv@ywVZctLJ4c^_CnY(@>D``7DWG${lb6*eQ-_)=WjpoBBbyUAM@o<3>_ zpdrQ)=%vm=lo?{g{mQN2hIABBwcKV*PsRXkogjtx2c$xK5`{aL4XGAol2j+KZNH5Ov3KxcT#$4 zquBA)b@D-VES~yVnp#-!baf*n6V|Mxg&O0qYj+xL+R{KYT_W)>QId%?_RhM5 zcTHHvPg@trO4eQn%t3_OgbmzA-Sk56n9Zz0U2bYlezP8i;EFW9Pdxnw+Yr4ju$S{i?BAC8&GLp)cz05vH1kX^eYh%Y&YlMb2U zsF$8tKOvQ0E()h_3(nI<%Mg6&*+g<<=K@suErMFk^hd+*=ka`*0e(_%hZhgF=5g%_ z{KIK1uJKYqZ3Z{_>LV?reA6PdOCu4Fl&(Psfxmc4#|S#2+lij9IYnpm+n}A*0d!Zq zEce1r6~CQ+8vT0kmdyCvMzWYZyyE3qvg@`y3e>YFK2!;9PP@#f2e%`|iNi=+h$pfO zA50RZ9*TpHdeY3bOYkf6J>pxnEoj%x5hT`J5@+hF)9v4PV9B?pbjSR0WZn2`l$<4j zzN$|or={c2n;E}I#@4%JW&KOCwtX+coE|Rx=!Y*jhG5UR=}21O7f(_*Btf$zXinvP z5*a;%~oBE2bZJpvsbW=iUKLGiz9_m?c&P(Eb1uvvAE~FAsSreNV8sa5{v3#c#Vr3 zz8~yD4GK5o!|gA4)89vE_st_Xhx4S{;Cw8-+5{CuDBzk`h}LdhiMf);sCjBMno~Fh z8!?#Jew4-eX2s;%(pcL2_z?e4#~nMgXp=KC2e3*&35u?*p|q!#lU);Qh0*S#x?WHB&$fpq#U#nEh5rZTf~jgjwpIWEKYot zQE>jeK5=nlky(WSRxvw>Nr@(X*xG=4)2Z0|?J0iX-EnHuwi(-c@;GefH&SJhNOZ}LI5lS+KF~Hm;-1%`npJ1e^DSv8V)k3onPh+!BzK4xp5H_J z-aI2y1TWi};U<*Cig4-rU3kVxO`NPONj=sK;}6~Op%#k!#YSRz+$3|ONa=1naM8rjKo%JA^K#dKs?|7L6R+R#2K^B zh#x+7z~`6gVwXRjsBxh@PP?i~)a~<#T6CQFzNtI5d1Xbp1M1}b`d~bJ^fk`hIT^kA zkV@)mGSQ+>tI4U^&B)5qoo+rAPSlh(;;Umep{4B}WbTT~G)xpj+7J}l-tdat29qHKKN{$L`rVKZH`b1+|HKv2W6>14ZXq`0%{rg;6s1T?9P zArtPd#?!Bqk`Z$*aZ(Cv@YTZ8)OGDAQkr2ZZYa4!`hP}~15X*`5iuc4@jC~`|fhku_MNiw^_h>pTFR3SVkJe`EF zu~9ENxw%KI*4;_^iUQE0bC39s)pM}s25EdR|2+zN8%QsHn8N??3__c|4T+(}B&ufM zOO}q_N(Qk*u}Az@exT6<^A#`9O_=~Z>PJ4(yYqvb?F}c?d@cQ&Iho6n4W_rgY%3lx zFeaVxwfrqTA%~RoTg6znCos5yh|CBt#n zfEQjmF_Er+g2)b02<_bIfO5zRH0PqAp*0M|1LrVyd)I~h;a_J+5UU_#yzf3lPwreI2Cq*b?Wm2UD&2%`dk}&mbp6pmtJwlN zJB}=WqfNGzc#wrnr3ln^h{x^LKpL^;xadeMHm`U^+O@9nRceRHg{|#q@TAeScFqT~ zN=c8_??^z8);~pG`$u8ZWy-XaPND0ScA!2D-T+kouQT!(J~t(3W^%4OaIHzt|j6sq|HN&G%C5w{SRZY>q^u zJ1>&RZg=8*I-97MmLtoGTJmah7U$L`$#qJt5=%5>k**eh^0r5fx(v_2NXRWxCM#gf z8Bh@7g~FKt>K)&V%y*Q~iuKP(RZ|_YdFzf7_C@ftZw9}!Jduolph@=n&A}=W<7mH; z32{8N91U7j$Za_>mP#0f(2FI)XWY_&T4~4NC#r|}olugV)U8Gb4i=G-y<=!c#uh3! z{tD{1vLllgrjY!iJrqk=V@u_ExSnIk`mIY)<>6lR%}Jhgd8E)M4Gldie(BA5l>eSYPHY;Bt2|e& z7px>_=T#L|xo@WJju_v|k-_XtU2JghD7k9iCJvO?Nv=#CM+cO~i$^?Ofd?{fh%XKf z#m#auID{)lTDA4!i1SWjwKWIOP-$nPlVgMvy$<0A%i7Rpw?p`GqBsBAU^q=3*2SmQ zh=svCOYEFxjN`&uNY7DjAd|N-ElOp_9N1FdBb-mno|w8x%8IB2J9XqkG}=9q1B$5c)Yh4`Vv)5 zI@fwr-z^bnP=z-ACHYi5)=ieShL_Ma1spE~+>Z?;&oG-n zZU_IXZzpp9W{WG&+2SMLKce-)n^1CtA+CNZgEC@|(qqgYzC6JjhY#)(|NJOJ_*N~v z=T0Wk*KZ~jezSS=f@IFwVL5M}=}Nc#aHfGGIl4_~Cnsz?gid~!L4A2@IP`HNu^W35 zAO2~K_0PUWBipUWyO~eX%P1ARgnxRG$iyviRdS}v5O7&H6UY9HWvZP+DJthU4ExJdSx{gMc zE9%HxkM;OtfDvA}tqD!LphBKTr_;Uv7hUK5m*XG)ebLmCw$ijxB%J4Y9F>vG5+yP! z8qzQ`G7_a|i%L{fLc0iE=kZQ5()!pV*-}JS$e#E0y&w1ec-+6-kLy3U#(5pb>-BoR zj$P{TzMO{>E(4imeJq}&T>7LX)t`pAQ0^6VGFM$s(HotZeEbz!UhPU68IPftu{OI| zAk_+FH1Ye%bg(Rx;cryQQ-{YV+LWun43GYy%W>VX*uxIrYJCFrZ<=)cz+`xOsF`t+ES1oM>%*pd*RnjD&lD592>g!>V*$=hFx*WW zzbq|=sqq`2;*=e&+t?s_7|vl;ztX8=aw{wjcm-$7j9D%8c!% zFH>|#Q+&Z^p1ubC+K$jFKXrEAE|t~Wrof7J7kqes6&o{45q>;=%EF(`5`O8-Cax)p z4bupPKi_~A=%$G#x*JgcUJrfQrj1)Zex%|djhOKln3(@mJfnGql?GOW;W~(B z7Pa8iof?S#Q$aB^o_t*<;MPnV_^n@0!=u$Ptz8Fg(yN%dfhu(me#nPkR%HKeJx=Q{ z#Zz?H65Pkfg0<67X8b%!aC6#$+thS1q%{v;_I1ViYtt|%*O;`Fdb4V${wQtt2CH zErEyqWO4eWU8tWk6yvWOg1o9I+*h;_Ju(Kd`rKgJHRCW#O*;#3dd9u)Y=#U{)U`)V^no+dMcN(Uw{DL9SI&_Ma9L z_B^B+MeDHiXMb`S*$WF_T%@;e`a!djE{minb1dgxt(yl5F818)pg;NhZr5X@E!VHv&I z{7GhT!fyc9-Pd3PTaV$kvMM~7?~5nAbD;BHF5P$O1qBLW689CSX??&M!bClsu0NFd z);@=?Pqi4gou`g{H^kdtfH7r3Cdxea^)bYBs}jzX*34Sgg<5bDden!aFQZsA1Rcwv(Mt9Bf=R1b6iWQr;(Z z{2pr}cAd0l3;cSqzvErlF@Fh7J2s0F$C)I_mutvs) zxy&AnMiXvg@yu(Wy7r=&^S45L{B{&Od`k^O`t@Mo5@X4{)Xhw7=LgYl<5AL?rh!MJ z%$ZEwXjUhaC4TcgBK%>EtXS3>V|FbD=_Poaml(r#=Scm@V>7`2_DON`fN}~~YY{Fi zoC)p&%f;CJnb=rAg>=^rqdlJ*h52`n;UKX9t?d4?Qst)-ovXJnUF`@e4(97V8SXy)*ybO1-%eRd&a+beTyR^Vn_oW^`cUe&1 zScL;i?C@vRIri<73Lfm71jGM<F5IVp>*jXx`#poi(0hq^ zzxO;m)O!&`rzBzJMOESNs?`|hG#?Oc{{WiJ_g9fXTjCxJ}Dez;x{1KnCjS=c@= zX00+5XD^Mz1)1KMy+4G=Z>%J!{x-zzl8{c&FJW`(dw3;hFD6aMrzg*skm;aONcqO)1fhJkCh@mEVO35&02)(H5rsRioyG3-fY3IZ0NlFK-?PP&YJ)Fz#siOXp)bs z`kd?tJ%fM3vRD6!cYH@;TSqw5iQY8Hc{2O#-G(o(0)6UT1zzS?#LO#35{X6$8@BQs z3%xoMZ~n`t`b`JHwQm)E{dpFYUDF`mB>|gE&WiKqG|_>g7`)WCoL7NDWO}~ zDq-n+3wBkZQ1JTJL50JYlj4M>LeZTiFyrYU>KAK6FT#4Ukj;IB^2T^Ef6_v3XHhn@ zQh!OObiRo0THomRB+86C%6Lm>x?EWW=f26a zg&UP%%(JyrqBEUF?b^u>+}40RX_jMlRsVqIw4-Kal)5p7S;X?oAsQ=xA7ANO2 zE2)26pks)00z2t|$`N{?JrIlU$|ETC1&7@k^s=Y`eRsSkxh?V3WLPIT)8@y_19qZ@ zODJ7xM(DL3cTwAr7 z9u0|O>&E%wVgJXBclL+(Mb}}I$zvGg`<`y8MhWwp)}V8U86^!G%JvR9NAr6R0^b+X zId94y7%Znn;e4CWCP5;xe!U1z)H2fnBvf%;w95yyi~|T^d)x)>Jtg1 zdqeDRwS;9n3c;>@c~I;kLI21hY;!nADQyP0JwF_8doE%cCT}6IHW|y~r^3+2V`w@` zo5}7lW}7Un*hKYqda!bn^uJk`tqPiwR6XJEt9EXl$E4bE*Y|K*@Ei} z6n56}Xleahu&7(lUVPAE&4rigTCFv%ANra!%qFnLTSlzPX$RBXYRyWMqL`O{B`rDE z32UxPA3t}_g2lQ#e$Jdza4KgHbo$8P*94>-2EG%oN_U9cVSC}= zn^)wS`&l$xdqdn1z61^|41t$6x3Ok)9<=FsFpKy^(8)T*4!zn*ndklR!PnEwUqu-s zqF+F%*JAvtdXr3k?t__uy|DLqX+>F`7BhRjfmLr>P5)Jt(0}hN(0GM0vv^b^gkSRq zt(i^Y=o$%Gh3$fQTknZRPKU^^%aqYQAFRr)0FTFupvfZO@7XWc*Xi`fFa_VOgj zd?;iZ_WM|pTejqOTR5)oS3+M(wJ_$~Z_GI2&#nfpWM?jFfo;kf7I0uHOi3+8rNAtv z*BOH&7sXKR^7Cx@r8sPAy8+>8(RA8=EQ?>3j6eGChdpa6A#>MsC|Y}*eF~Qa%M1G; zFt`L+kqS%H5RXV7xiZ`5QfL!nXO#bB-z!R3>Gryhr zywSl3twU_V37&Qup4?%A>=@q7TZfWaSOcUFQhnw2JWV?RA)YE|Zo_ct$sUr(?&j&G1^T7~*^O z;q=#&$voSD`OWV`32mzdAD3<5W2%c2x<}!i4Sli8XcxYYdWxgpl!-xFN7&269{i^L z1D=LD!=$&7WYKE{23>2WLWi#`LhcPkDj#J_?m4l}sxy#LxllNAtQ2M`TqTDd2PVHT z63g|kVBXGcRNQ3EE=sf4!yGc8*P2S46`O&U?}yXuw?RUV?9bn1PWEdea6m+R+lrN}X`@ zI6drqnk@dl*&yiO8o~ZvN@lGGYeAu14x=@1OLR`ELFxz@*jsfF{kAIMYwc!HWk3Sd zIPQR@ID*}(`A(bX4WsVVDJ*o^B@jKvfMM)5@t)!bdbO~X#`xS5r+x9G;!7>!oTY7W zMr$~{946Js%q`%}w0Dx2&bcgR(oTFAJDJW5C=m=zu8B{7UWOUc-~U;Ikt{o*AJ`ur zfH!|ngv?d9PRC91YyNw zOz3pOVR60CM8N}xH+GA5)0BnWzl$iTpC=4|Fqp~d&KGtl9YSR@f7YR43SZm}DMMiw zzPhv@AI@3`dxwo>nUR^|qffQs>UCk{HToxHzP4q>1~+NTjTm9&7CW38auUVJIDD~Z zF%Fy<&E7Q`<6hOv@ZXA!uwzI%Tep80GxkbA_0lYH^wlM>h5Dn^N8o&K4fnEb&(5fvC(o9^5&~Iuh{pfp9kWZc?Y`Uk5J#`H@{^oOt z`jf;)*F6*}x8=i?@?|)|@0(zwVF>E5p9;hl7P(fReVo6Jo{U?Hb=7X-%0VjhV%Tdq z6cj5S%zI69tlzLfN1fmfS)i7i4x45b&W33t{y8{|`F;1IUXAzY@!&}2bH^Ggt=w3p z`42dM=9zFTYA;J#SdShBR&ZO!Px#}g#Nt~s=$T&AJV;#y3(ez{o;5$pmqShy`3Tk z>K}&qn^)mz^HJD;PoDyVtth+e89X?4P5f;4R9H1V0ep?6d6z18_PezRnhr%#K`&)Y ztcw+r{u&nhsdk;^51pH!L4&s;G>Uls~B^oT}p zUjP?2gTAfMrJVy0!38drwQn%P!R~Xg!L<^6f}T*qJS9}{RmM@di|Li5n_|B#qA^pV zLH@Td^?TkeHlJC?{7UO7=S(B1W-Y^M(s?5FQVgnX$Oq--Raj+bg~e5USk%<>r2j3B zEN_fto#QS+)|rPOWcaYbE5FgdTXAqhs=XbLs)JsKmDqmED6u+z3%PDf7W53pW0cxO zN^`Vg3!d1p+PC>^WYIF}-}|_f4JZ;iymzt;4|A%S+)U2RyO_4#MN!?##CB8CD>B~E z%39{2P+u;vQ%(Wo>g_^d69ZvWwF4gQ$%CPMy*PG#wNQJjn%sBx!^jP?Y{0^2@ZX7bCoLbi=*I7UJ(C9%w%@0zQQtW^3;2!l>0ppxjw{S@U(B zh>EMAW8Db0qGkh;b|cNr)}-G-Z<*8hXn6A`9XnlK)1MM$@yXtu1Wkj$g1O^|ELgsK`z%P9aTQiLg~QjXurK5Dv?Zf|>Vs z3;!KhEXcQ%9L7!l? zd4K%#P9px=D919*95F8OEF`b8WzRh{A+V%B)9OQP$n3|=xa!*J;I$6o*{Q?W`shDo zplHGlg~ow#_BQAja*i2EpBD$_Wbi#pufnKF+M4Zz&URd=vhFzWeP)s$7g|$7C zs3KK|H5cq)DV2TEQY8b9yi~#}lVqs6HW6LaoS462G}*p-2G!QJ>`d}{*7ad8{!jKL zW`+Wr8M+TPA5zB93)zTiCTO(z2~+I1TTJlXdY|Ej{8)w+Cztg zO$kkKJIMi`JnbTJtTKzA`2v14gMb!X;t|Z_nvWD>aXBhwd!)|eT-@bHBWghE_RucO>EQJFP zUE%kR-!NqQEc&(kIMzOt?o2MJWUE`lj>bs$$OGfqY-?aOjemLXfE#q-hy^R3rG#G( zq*3%Td6cPmE%wVW5cskW5bRqA$NXoLlH(S!XX;)UqcEM)Ut~$E>}-XRSK?{1$iSta z0?I7t6e>Nk#CShxrtnJ$c0ctMwEAn|*@_$V+H9u~F~7A+ePch-Ji-9&Qm5jk@Ab^u z^d4Nl+6j-RA0&@6K$ib1#QUpaX?KSWZB4o-;b;5;L8*!sTX`{y$N?yqJ6k;Z$(F7x zRUxIeHDKE3D9xFOl`Jgd-wOjX#N(RKF|$)#LJ}h z^RAF2ey5sM!^uXf4|o-RSTvX~EZKIXZf81o{NENbW{NK;DRJknsJgm|v^UK)5E((5w|r zS_YEtpCVRX-Ua+(TjCe4WYdHh?CQItWI@v)rRX>qUNyl_Ck^oKtPAku`Y)kZT^7jR zk@8=v`mAr#0lHjzR4`a~idGFMrT7Wz;w2d`x^`%)u;y4Q*hR==ufdj>xWE>B&5EFp zw;A+nErH%h4HG}xr3_U26HB`Wtv4e2S{W;<~sRr}-b`Bj1|0wzf zU!^}!XJE8{5PsHe5X*H-Bva1i2{vCHadmW$yRP+}#Dh2->T4~>f$Lw6b%!6BD0QW#PORw-7PZ!tvdS|#O2Om{)ywsuMDXKBXn zmJMVz+cM4gLl_lh4iz!iSfZOV>&Tjp2ODC=l-+Ky@r=FTWgLUc&lQkW+b~R!-NyPI zbY;oeiNcn`(debG9hdH7JuRo{+{aPaIA}k++N{SuUHSlafm)dNI!1hO zJym#KkuN@et%}?Gtz(zYd=eIH)u#!bMR2ZKq-i%Vz^AV6(>tdbk>?(J`Wn&z2W*QW zYEw2#$_pTyv5PTxeI1?2cng)gq->T$Fr4AF(KyBrDr2*;PIDH9Hfb=JTFQ={crUtC z-iJu?N&4Cthl<=?1HcLxg#+s5w99mw{b4@IZ7HteG^vQGIh`ny)7?N|eo z#oaK(qA&c@R)kDxe$Qd187d_HgAWgSvww#|_+yKVao3SMAk*;yW~VM<_;v_V@K9E? zeluCOhrmWvO9-;O50B?f#lgq((9TwoT~5v<^9p3WzZjwCp<3a->2d}|Hn6mc!T$4! z*tBz5}IMaS48C%|=q$hdIe)%ve=-flreo{KR za3kDzSEYUbc4E59K_N`x0?V3}0F52ScuA4Mlm{(6bPZ(kS zuTj`*Lk=_R$$(Dh)6A+{Q&|2?Mfw>Ov#x!5LUoZXTmR=6GdH~kb9(j)uMb~lhl6vd zW7BTZ+hfVDeP=Yme-rxsd?9QaJ%x2u4FcDic{rh@9sWE~gSWZOaBhzoJhJ@*iYWoV}KKub>aS{Wgd$n0@3ugYSXG_yMeB=?MOKfI9nO zqlU|0DYDETEyDB_d&%h40Bp+n1hFrAXzchq@cxDoxd#Taf!fs+cFUfHt{x!{a`U04 zqVa6aHd(s5Sn9!l&_?r)dB8pMhd5Owyk;WJ7OWb9Q@?wleD45B$LiB!b>Skc2~x_U+9#*mrsuE6gbn z)T0J4#iJLYYD+70EA65~XCF#t+oJZ%|@WPCbUG$```G8zQ>YKkQ); z_qPT=scE80N?+El&<9^^noox3%J5VAW*pV;CR@~HPdeVe#pio>u#Tsjr8?evwy}B| zHPsae=f^71wK5WC8Q!G#A2(q^;Aq5*!$OG19{l(4G#!l1q77B+_^=D3*?q-*;^!6C ztR?Fvj@-k;(S_5=urVCHr(J<-Ss&?J^kS zg{^}I(1WB#2r~9zt`B=)m18)>UY5f~=SWHNqCI$DS|Rvo&n%YxLyPSH3udpPoWX4M zUMibEjSWcqDp<1r;NY3-V9S&tcgaNlTgx`~@!iJ zGJ_O9D#Nu~&*)-s2DypKn5XtyFv718AkD>SzUvl4?0HD4m&2!h62qLKD{&^Z-(&rCrCB6j_7q9#|#g z1L4wGzMF8Lu1#HsuXD46iwS>Wqo*3E9^s*M?=6tIv7Ollr?TAkCZ-kdjOD!}*sRjW z!k*VBSYz&Bu*g2iRI7$!L(NrjNt6%fkKRqPyPd=Xm2ZS;vDQ>;J&CF=n=rljbr3cC z5b3QM!c^wi;{5O3{F4MjoF$!;Z8n(FI~_~#t$jnorahMCKlO0!$iWnsoi151vw}~& zUj^TpBkb5D{XQ$)Cgsf0K$n!+V?!@kD`n)B+9Pmus5=|$sou<4;gh)j zWdJVXtx@OtEy|Dg!@XT=P>XjU+0POR8#5SR6d2N-)&cB5&~+i@zeGvNAYBZ*unC`S z(5g0&TZ#qWS72fIO57fl0{^)eiUn~6Z0Hgf(%NTERbJ;Qe&c_1w!Q<(O%{p`MxpHU z_&2b5>|(f+pn+?YwzHY>PhefBFMFvFh?7H#!Q@ChstxpHyQDcz1Giqd{Fp0dZ+R)6 z>sd+>$8W-xW^?fGqe-HBFeFdeC;4Wx3-V1@qhyqnclSF6MrBeCcGY5x$UBZ@V{~ci zrQyggJc`ZR7KxWv=RxLnIab%zMvvC-hDCFe(PZ9iwncsfb2O9gp)?dH7&btRY9^aD zNXk>zR0?;l=uqZcW9nAbCFf7k^vu?T4IUUnxuZ&<)i8nbH@&KIQ#b?C?H#ns;~}y; z8Epn=!;&;d`1}1d{Io4b$E0KI(b8`abqT;}$89dM!;{HB2^O@vu2Nv`G$V5n5# zz;lNW#+`G95STk6WRO$qTm&y!M@z) zfc0J}N%cWd*X|4)@-G5~%tkou{XuxGRf(f`Px34KL<3(HVEoAO?Ai9G*rg?pZ};cp zP5pWnoL?t&Wy-Se6Y_AFf*u`f_(DJJhm)cGA6m9`jj&zHjv8i9X9q?sV{ulgRgUr3 z@m{VjJAE@w>^)^E9xFUdvbRqQN!MCLRhJcv8@{)b-$3c zxgE}z+@(iS?IwNm5oWM>A#3I~FyE7ZA76XHD6XKLM=Pp%0qH zZNR4Yfq1e1I&f;8fgS#raEQ%0s^uOE3e)s(=a(#YZ(S!o`*)g^PMXGgpDGe8l=UIy z^?7makj40>y+;zD8O1u5>}7gurF|9K?%;ol2VuAKFuZ1h%2iRcXJzY!v`z;kRiuZR~3QMa7J$7 zA-GzYjXlp6(7>EpvRl4~8J{={3;#QSl`I;MPjMF`Qifwc`7U8w%QJA8e+9?=0Mzu< zWQ*);;iRW6$tTt_>k|_&Q{g(jo0Ly=uGXToN1Ieru5$NoPi9%BYpLIDLl}PPEY(!6 zXYEtgg8tSWl1rjB>@QRSyBSYm;Pxyw{(CMo*4?4hCyD6$V7bK7e?RLrd~H(SUElt&b@R;o$>DzR+b)@+GmQeTuEA!W70B3W`+Kc=!ei7}b$q-rsOZoYX4 z4cQyWtkRe{yc|%y;$0z&=)6I9?mQ63oT?H(gD0-*Ud`GL-xlt*MTzMy%dp!jT5#x` zN{eRP6=%*k1xGI}16iY6&^}2Q-HfDJQpTT8* z2Ibl33CC4~gr#$n`4kg#wx>##GJ7=G6;LL%&j&$_wq+zcnx3B>2Mf^xua9bGE`NHl zlnwckiwP|xm>hydfGi55A$jNaQMw9QGO(T%!n^JNpOsKbDch278FO>viN8NFFKTrmYx@O|> zUiabK>Pakh&=_`AGYfx(>o9pMBi!I4k9PUFz-wKmkxN9u2|m+uojsuZxEeJY{OM?7 zqtK?ki-kQ=WV3X3uuC=1$!TU8BwFl8=f80*+jTnrSUiQMr`bvz6czCNvt;O9wOC%GEKOZx4)&@(7~(XURjPDB_VfaZxYEKdIJi*H6a`3f?aw9hbpEBvyTs?+|Jc3 zJn5pux!x;9g4^7&!i#^xf4Y8q$xc!tGlM(Op z--X$t{i;3ed#WXA8D>Mo&t)_+?I#Tgjb|$D()W1vI+~`wovpn08T?bC>FkNkEa3AV zh})S!6AxY&1J-m>?bQXeX#F+%)|o@8Hu3Cq*)K8Y(@-`pAQ1)(+sj<%Y)vx(fu43CrPm;#cyJWFK`W!qz)Y=x)?R7w08X$Cfa0!x>MyHM)w{ z{;Cz0#_z$(Y<+09d&4r<^~Xjt0rf8(6t`^j!wcIc3Y``EA?;@fE%J9m`RTWW?OICA zHgp=3Yg1;ao5#UM$`ay~eew6^T+U9v0-m4_^6MF_bZVq+6;B}Rn<7>2oJY4Er!bM4 zsc7hEc)w8r8qRKnbLNUP`^p12m*R}`m5I=I4*s|)LCSC&x(L3ixsqj{ukkWlJs9gKpzPRc@uJ@~dNgh?$ZFh%#~b(3z2j1T z;8!KJ*OM^!d<7Jw7Q?7hEpV>U2RN@V@NTjL)7Xo`=)i#(ci4-Cw(h`J$G_0?w!1Lv zT`FAKRxB*D8VJ8ue}}`9>uB`$!;-eViL^GR7Sj7ylkQRzY&G-3ceDnVjqA-$zbv5- z?tPf=M;Gii8c(|mPKqbn1ZKGYKS;MSqIuURkZYhm>a3d&>1tMN>rMhNc_XOYeJ!{= zo+9jeX-aw-O6>C7Wth5a8b+RITC!GeY9aO~euw0JlOj0RqUeII>s+h{XV_&7$=Da|OFkMxC;4jY)o0xgI-w+V(A z6w)LqFRQ3M47(N=L&L7#SfHH>>RVL>oZL+g7lwn8{zHuJi0-%FW2FgZbw9;N}aekOXIPleO3uJ9xFra*+J z9&LS@E$!8KK#BXB$kzukKZbMdE^z9?i!2LtNTz}#1)*^9V>=?uZFg( zl@RUy3YxTYS#ZyJ;nn1$u;G(*jv8;sx--90;ricV7(W+$ALhW%hr7k9WGNfG_<`8} z;Yj#>b~ej(Xco6eO7mRCDKNIvkUV6ML8@7*pfueLFY14wm;4C!@99tI^W_UvO{fO{ zI4#`M*e2#jFBT8~S_5_CZ@{1YX_V77NVNC0r?K~<;BHZx=#wo=KMR|o|Kv_^4jPHw z^2@Pw{Y{cz5rbE&9l)d>@T#;At@>0U)pleFy75cc?uMH*wlfmHloinDr&13?HypLy zr5XR4Rj^S;SJGiW2?uQKf}5vx@o3o))EPe%*UgC}rAdFmaM2MeTr-|sE_h5`-{Z)j zy$ENnj$37M|BVtSE)OLw@6+fB#8%s!83-?bRFdZh$pPo~g4 zVJSNNr-%Lb?1lm03m24uJ8}|wWPS=ojo;p3tQpd+@O_}kP_^@*lgttW$7Y)`SaccSC|ke|Fh; zjHK^pC1!6{1EFnF{(8(8(CRj$y>jE&i}&lnuHzH9ud}86hky1CS3x04!vry}&q zz+HFxk$-TLaDI+6TUhIZ>D8XF`~7L^&wKcH7Vkaf{>J&EZNW>BU z>{we$2;6hhWN!~nrq$9;OOF<5hWyHLcw*QL4SGk(dg^s5{4toF{J0cCf+w+Er%piD zut-cxYoG)Bnt>b9S5Q5@4_mvM$X%+tCr@f4AD533_u_lh8r%=-BV5G%Ik#w>{8Z>% zmq3p`r?4?Wi}0Fqf7JOn9v`JofO+Sh(v#R1RN=G|vZZ=U>B_k@;{7bD=%>IeK6k;0 z5A!H7Ngh0R@237$*0{+~l?h$xG-{p}`*(Cd<5!rH&AyQWh6f?5PbY>8XNq>i*%8A~qDW+TcXnTE1N3?920 zTw10>RpMw?@njFWXD{HKo`>L|j@LA*HXT)}U2vzPL|hc$1HV4G2<7cg^jP^E?fA|y zIky&A`rQl)F)Bd1lt`4he-oxeU=k(86j(X+E;rVt| zRw`v}=M6WcALHjynQk&PJWH!Or0a>cj|bwc%gyBBWyI3=Wh%-1@4Dar=~?b0ed+%{ zJr5;D;FWAH=gJFCFHw;{?ec~ziK^xF=X~cbEoPi^ z^GeP&{VX^3c{F#EZ{;NIwVbI-7I&(02RB~v4Ck?XE_cE>gj30k=9X;_=l*@IAG@&c z4lYomh>HpCb2=%W+_gdbxSexix$Q^%xSUAh?lhj|jyv4pGRKB;%UtGgV@q?n zXTB!EKGSm$$COucsYhmW$I|w5X$H%<5%O_d=K63hRSe`j)JF1di;i`w0Z@K#Rgi9BEU;sY0@{ex>YdCBFj{KrN1 zs^!MXPUZ3k9_EZs4B$JiR&&*Q2f23VWUgUR5jW=cD(=iuUH$h*@nl5H@ z=4H3I?+tSN(CgQsxcyQn>Ix7dY1rjz9fglh<>4&lOpZ1%a)7Wfm0v2+^l5IH6)fx`tKn3LmN4ZpaY!C_%YmoRcYK9 z>j-W;J>zQm9b8RE26y#U4QG~@$<_C6<|fy7abB8jTu^cWH!``F>wLJEGaG-LduMJg zJ%5(KPuuOxyF}064;1R~7xSt)$IaTjX1OZ=;-?nBXTS91*|!94-J>U5`I1uU$t!`M zoBx|T-(t?UZfN3WTWRrbcJuh-2CJnU+;47cvkpJ>b_*B1Wh#Hm@gwK@uZ~+~-kY~= z<@o-+&G}hAcKqP#5YGQ`6Q^RN&M(%Mex74vIg>w@T=0EWzT>wy*B+zJi?-`HS9Ljl z$;#KzVOgPFf%(_Gp2HZI_PEBEG;5;yNsE7$t3h8uVL z5obP>xaYl-IRnQlT%GhZrum~}&S;GEw53uQ=RM7V*Q*%GtN0Z2D`tD}qot>DHP8Pi zCG=eby{8NK$*xu0v7kC`s#`kmhGY0~X&SuF`w_fn&_LdQ@*l23<1N>@$D6Ng zSLO9z9pMvxSMV}_3%J4mnm9v0b^cnQ$i+Cy@v`-y{7k-ya1gp>8@?ex4poLolxv1`L6+IH*#)OZsh z-E|+~mBCmnaUa9$vtbn!g5}+U5+_ELs+R!pvLOXm0!= ze0eHJN0(j2t}A)$3gue(j%T1l`X{=$>#+Q}DZUpfg73~KB)BZ;VBrI@-6}>m%9>C# zCPogcf^c)2H;k0nXM1CYgmc+Y{O=;p-dCgl&S#)p$AEGpI&h|U1HBfPr@&z;(iM{? zm0%vy6;-CE%l4C70S`&8Er7P&9Fj1$#?^v2e6%e?z_N#!Vd{hNmRQ7U6 z#?NwX%ncAi{N+&8nk1v+Wd!io3_p(fAlBFoeOtHSJ^3N*R0(1u|3js!8y>c-gZSxn zNH~}d!{>e2HuE-xPtl{B-Wll2;-&RUcTpqj1C`n^MBTd!|H4eP^10#!c9BVYIre&g zKt}jgoNwWvt!H|$%<4TpY8Ap@Z5mEYs#1(}8uBH*5pI--{C%EyYf%C_B`Lae&Vo!* zv!L7(j`%`X)P|mc+mTYd)bYh7nces|GzI_F|3H6G00y^2;oMd$cs5+e!o|UurI&!! z9tz|yl8ZUJvd~|Vi;A}?5E8zFvNv@|*qeYz#ZZzFw4{^0^Qk(8pSC1&QuKaa5^IWp zP_7~wD#bvhEgDh&^J!ahB`Tf{L3p7Q-B#eHK+_UvUJ<5|clvbSdJ1(2O{3O-wq!Fj zgQ6E_pe^Gdo!31C)!aciE!877S4SORiZNWh8=o~fXi)SH4x5>yk8dreE|~(38O5lm zaE84-H|f`9A^BDz_RNh$1a}%NjanhVxe+yc&Ld%(HYM!LfQ^JX{zQDnj4(dR;G9J! zUdm+RkcfeQ7Ni*BjZ57JAtzanQi}i_Ru-aewHKFXuTO5po z1mj1tV8HtfH1Zsw_Ia?eoJ$LpjHtnBDcQ^RVtB?Q2&}({$0sr|%vXXb@7K|<4Hv-K zQ-DX`KOr*dFr_Tupi90h33F~B(QyW8OS7xJ-z_C^d0FDD8pTU?kJk4QIVw_;Cz1Ot zaLn(=ZucW}gvG^cFAvd^**BmNK7(fOm`+K5Yp^5g7w)gmMPOPLUVQ$6t#4*iYf2Ro zxHahLqG2pN)`i!p(s3!?#HZPDHIf^dp za7=wCORwJQ(zB`pY-Mq-QJ?^)EauUyqsz#0crLBqC{EeQKD5dAB940Y;pSd4Qi1Z%v`>z2=mX`ULO(%SUXl z88six$6A$A&>lW|J<3DQ?0sw>?nE-$@%)`XWX^HXOEn*iE$D`}-dc26S7817*AP2z zNw352VGTR;%BeU5jdBC13R+DsIliOd=?Hj+W3lP;O<04XFQw`0 zr_r?_A*wVrqyRgXOPqDd<)au?KmLftQR3u35r>C;Zn(QA7g6rq zVy5W)!B59`q(dbm37)rapb$Y2Stvl3{B`J04Tj;u?`+i8!@~%{z+}H;*+gtcsSq)o<0R(7v!i43gI~Bv=j9f^jGX+UGvJK~U zKEeUzTFf3E#H_=C&@R4@lAH$YaQ%$IfN^NJ^HPDEJl+1?i1qhXX=|-EeNE~^hukRA zPjJx^l`r5KTt(;msvt7RPo5{HlF9Hg;!EHqZx4CO^1q0$>ZjoUW;euI4uNmYb3Bc` zfTVyLjGIj-4*pK;sEvXAOLa;Qj09KjLoAaohvcJ2SZ>DF=qhn44m$wX->mLEG>3NV zP4xaHhJ`tf$0cMnHP-4k@%E90#Bb+iQ;iqwBLGU7yWIJ@+;cclmL6TG?RTlU(SzC*#R4K3W=?jYjn(WKC!L zZTJbAuf4&NvY!Z>+KIpU9cVl@gSHu;MoVoox>#@0u*4XWrpxHVv0E_MSA?$?g=keO zMe@sO)O>Cx$@_()bn+u+y!!#$E8pT{5T0ye?;RqJxCGEp6kvM)C{&KI`S>WynRAY6P?gVse#i?(f4E>qZq{@K`gzam? z-$n&8jdh@@KEX=yOfWAe@67K{*wI}n@Va~glmcU8#jTh3%rX3XWH5Xmm zhS2a^j23o^lh4LR?9OsRqR=%M+)c-9#ay($x{3Q5D%d?W4_Tb|@#%X7yi|hmE_*t4 zIUGS`?gQkHaZ;$u0K84za6wL*ZYS$cHg3A1CJUgOP+{}68#4wt+VY>P;NSM*H?ChO41 zl>LaZ5ui7f2^i(-z!&al*qthcLE=k14B;hlmlT(|w-YBB?wm@u>Ox@p#E{J6g5>gcSoqwD0&q7@g|S$E#apq>)D3b<}f%dzKas~y<{gngH)IHp!>QLD&KQc<2qv{{bIaZ)m z%@^A)lp&YZ=UMT8k(~Ji=TDfU#y1D`yVOwB-lKCmJpkIJ8xY>S4pB9t@Ock#s;h^8~ZvTo*)?e_CEy6Toc6Pq6A8Z<(amdR7+54`ctJV%(LYv6db2|=52-1{see8Y7 zN!D<|mg%2ZPDqDx-zR8W#Nk)bQ4AeQLK=TMmWF{lvRqHXxTY9Q!@k(it4Uv-`A9CI0$ZXs!uU@-Obv?>H8~2W{{-k}2`_z> zd5CJ?P<%^LA>L$Pgcwh!4g2q6TdFZE$KPS~_t}sZv_fRaOggv{vGv64E4 z&9mIFJ+Tz!r4eWqFoz}AX7=+KPTAi_W@jqgccr0TTm!mn|7FYh;?-?;#85I`ulB*e zM`1Xhc?dnHMQGo_5}ira1WfI`jMvtxi2US_A+7)j{PDr``Z&Cde-7lmrYM0DU8h{<_k-mV8I%D9IEy1x+WVuaaC0+1Z8ib-3h8$A4k z$Hv^0u!ffm7d%Fn^f)$4=dt*njjH@!@IO$&(T)fBVx9rbt&xz*_QAW{3S_Q!K~uqB zXgU6X(g&su$o&Uc7GcZm9vnB|C;5gjOt9R%bTS>=1Ri3vZ5UdU4cOUy3jFzj(0aZO zS{?y>N49^|UvPh<5SM#oF)A^G4!B6*uht1f2u~n5ARIg;e6)I6Dx?DMLVU|=IC{#___}ml zo19LA8*S0AxgLjlV-YFj3DMPxZ0|h6-NG zsb=hTf)l{8&Kr*sVsUb=J_?F9qbalsx7Yi?L0^bYeFJ`~zQbn2UKnf}z-sewtnduQ zqUHziTPjNT3+iCW849(3i4e7|g;CHC7<^wvS2`ylB@{uYUOHjc{Qzp+@CO@@bCU6r zQS8}Wh_kC=5$g04m(!goM>!1gnL^}KT7Vw&!z5cco!0nCQEm4y)-zp+=ThJtS z>?3WK1H(|Bxd`8`KEzAuAQaz7VtM=yavvn2_p>3~3|ArP`B_#|2jgtJ{{rcG(Op9eaz%H{`$pWr~SggSWQ&81_%XEG{3g>(I~@76EnN zt#Ie_$9lyKs7$$s?fYEuJIn_*i>vW3h?{!#XW&VY8RmVxfu@f&*NI=(UOd(>$w>$yGPTA+)muz;-dZdl8=U~R-wOgn5rk(rl~ zV3?1Y*X+se=@dFC84FLI92B}ZB31n$X73-yYQ5c<_w6nm2O|-_90MD?()cvOaPXUD<8|mFurDQ?wQ*=1Nd6XA%y`TtoH58a!@} zh2uIKgd9%9K^=cswqJz2(QT|~N``^jcl^0shR(^utPej4zef*Ic48xPt{p?e#bXc} zJ%iF|*WeKl0p_Vd^y?jbdTNIKU2S+0`U-aS_t4Z;iu4U3IIkCpOM9g)7Fu>7sMpDVc`4`2 zD6)ogajPjEk67Okz3MLt|2s?J>klGnydK^hTofrgmDHH_^0sag5A$EbZ{!JNCOYxq z(G86MZb#zN^;CN)40m_`2iwC=_}M8%jl$P3&SqxXpXH#$TnKi1G~pGah;lV<@_u?1 z8k|1h3=6=gszU7YEr!T5D;zmLj5W9YG3E9hl-h5>-?kVyhJV7B+nluE-#XgdJd^sz zB&ba`5K}T^QFc*@wwy~sAx|?pSlzriFabS$UvSk}mOSO`Ft4BoP7a+Ax#A7YZf?4l zSBkx#YH@I&1KB!W2wNWwp;itWa7jS+&weByUQXAW7LnjTf83a~#lS3MLkq^pTKVSKU@~_faZN` z%uw&ds#h)O=O!Y%DE2=T=<8DfvXK&_lt=f_FR+y?)N8Pg^{rtnXFKn+Bu77Y+WK=Z zc~0Y@Ytp(@{YaGZKgy7euqYKg+eGX0wCGl~3>}V1!ydg0bm+gi6w9=Yqm~BrAnPFA zUMk6k(b}QAAO;KF9-?#20Ak3zg_u6wZ9x2OpNwe%DQ8zr&uKUH_p)UmeD2 z0(8Y!2N8uaSo}K`t2$+9zJ>E=qD@b>JT@#z(#;q%}&>%Wqd{hpq~B zKm3K1i*D$>E=gJ~&6IzriPGlzQ}SnPHhX13B4Xp{@*2Uzl-2ZnOE*;hL?G5<2aLUA zNcOS}X)(=tO*7NYEfeTYxi?*Ddq(lHCup&vJ+*Kpu--A098D^qP?U>%mW${y({mK2 zHKKE#1ns`XPghy3054&Bs}_jwhk9{5vKo6N{a|(}4xYIhRM%9G{7NzMSl9sd@rN*~ z>qd3JGpsdPPCcgoVOa+D2;2i?M7t1rs@8Ffp2ks$Irt*cpfTvCZgTERAyR92mP5A>WXL`tBHD z>6|8T_gCZk+!XNTbJ8DHe;qKWhw#5!SYsLs>qxHuS7NNTOdjS3fCx*-<4)H z^N>SJ0X{2#N8`|0JSty9+w4EXihZ`J9CPU?(?wTbGNXSd`N`#T0eF^jP_X|moM`gG zl_~=@LV&_)fFK1bS!?(PexW}Ao^}~Lu+3ky{RvuhxC<{ zxA_v!Q!h&0FqJW>3UqlOfVSnu<5wW-HFloCoL_;IH5K&A{VGlHC1P0dCfRPeLv3F3 zX=b7dNqMTX-qMs@H7)3YwF$jZV7_hL66#T^fWK!SQck(S>iG~{-2}-0QxX=rI6!Oa zN$_5o1Adt*e9v@+*Ir`?R)peR$P}9Cwg6A3yu|75-?+_sAa)ZJo>}Oi-b-q(MXOFm9()FxYW||UC_wg4}B);~P!I%OXj3wJ$=NLf!7Q;qp*N-9$!pYjN(=DkL%(+T38%le6>8F)W68#kxAVaoAt7z&tyXn zPkTI>6%4zzaVU=Cp>Kxaa8;?q+?qNxe9DJ_;}>MB`-4mKK0-NC@G&L`4Nj)`G%FPw z)B@;=of9?EHF{{anw%%B2|fd;v2Me%&ocBaHy%e?7E*GJAf1qtqH*`5drc$RkU>~SF)jZ< zIF$9w(51SY#s4A*{$~wt8+k04WsAlKkyzsO4BTxUNcXM;&xI1C_=G_H%LQ;02~!iR zCvVudgL{4^>PnB`o6>fS1;)d=p$w<>2k~X^LCUC+rn;;#IEfv=P6J zJoyN@W=>K)B}flgY?cc?Taa!4jcG4T%dUvR;kdu>;gqLPAt@T=F{8Xoiqs#m znZi|N=~4R@k`154z+yR4ndONmYlUf(#&>uOs*;2BJlelVh+K*+snJMeA*3z-?8VoM^g0^SdC`=;|XAf^7XSo$*)1XOr?Vh1?`3rPCF@gMG zHLjKl;rn+ptZs0Ezj_}c7bYQa7bhM2vK79y^XP*27#!3baA2H|?vFpi4DJUw`}rkG z0%fVm)CfwfmT)>2i%)6lw7f-{Dw+TMqF@#MC}w(hm>hY!Fu}6Rbbwk5J2JdhL4Vk24M2!}bfLH8@H5^cI>K zE<^r4U8o#5M*G|U1J@}7GEsEFm|XzUkM-cd#ZvH93D8GzLl~^6z^l3QNdLGt&2{p{ zT#J+R{tffqZp@)o54A{~s}E;>O{Em~x41EK4Gc! z$(XMQ%U4TC_%SENtr4bEX`L8XG9cj%^+;1+N}QTXV(@eJQYyOuxn6rLjBgEFoB z*?`ZXJhVL07Si49`{DSBHY_49lXsYUOOtAuC*SzB6-Ks8NxMRl%6s3#>RA@fM(~k$ z=_^R27GrCl4eY0un?o^yf6sEZ;oOHWo zJ{3A&r+K#Z_`%(SU3=S6@mYj&FJ{8%RXD1PuD~p20?+l;$Sp&THmWvaa)ku_nf3-! z4_4DmF%!DQcos7!VKP_YB%?+yI%E44Kk|-K%d?wE=&nNYJViPxnv1Jt!OYjXhm#jA z(W07z4;Oc1hocXq`;OxJe~GYVe9wR4=}e1u!rdMLYO`ngv5;{I7FGDVJR0X*@8aL_ ze%xnT%LL>0-1b*vT7*0$KI(uKo`Xr~*N#WMcN}(D z#o$D+3faY-pvL?ZIQNZXx8N@vSDr!r3R`KnV=lJuYQ*m;@9{Dw8r!(_DRrF) z`7V4w0%2E3E-ML!d+jKpnrTJ91z11CL0ekHX+`iLK1iu!muw%LPZVII-ah(P-UGkq zdq}3xA4?S&8+cWO9&FPgUhf(7Ziwx{rup>9rxD(d`6+)xHl(v(VzO%<4R9LMbcMfg zZ4bu#`N8;gg5|BWAk^%8v|ptg zda1*Bv6+|3cp1BK;2{!*MM&hiD7n~0Q2e3kOQi0dCD{*F)Fh}zu7`AKf%`hz`8W&N zm(8fXPm#j+5V`XkQ?&JVijXv+Q!^gZPM&ErB&b0GS=XuWyb#@wG9|MGn`pPQD0Nv( zp-bXZNi_R1mgqf#rn5V}v{58a%g?A_wP{b+5zL<+goKwbVA6LPobSAGT9o1YiyFfbw3LmYLtTtSN4C=T zzElkAZKm1!QP^lMMqCoHc+H!Sx7P--RQEh2^cd@~cNj+$rATa@5UnlHqQgx4bk=_X z2Xh&^-17;kZ-?NxTap4~ILK60g5KFm63^C9e4WdDjKogxZ;ipGEz@YZH5ZxxkfgX# zF`D4#p~{odcpGv9LDM+tjc_pnZ-2pdZ63-uzeoYq?HKs{3web{s6j-UPRx0M(w7TZ zol}LR9)J38)i|0}R2loGP0zbFP`krf65{fpstLAFO;d6F$S59d&qB@O3SJLsXd~_hX1^LU8p>p9nS{&K>wr{~Y&PJU3mW=$+O7NAgp&*4^WR`H! z$~(2-rwW{uHlTaDigfZ@612-XDCzSZ3~W`TcVZ%R@JSN%|JEYytpb&v7=gp<%_MU1 zE9@BSQ18Ele!Z6<-pn{u#!shWxn@+eI(5yc1a4x9uR}6aGhB#1S9Q`qxrs!6_R2m$ff# zcqL7L?)`%9$2_clx{OXZzDKG6ADygr0}q!Ly-Me&_iLt*(r$Umm-qmmJ5O=avJ)S{ zn9(n+H-Ec=#9F7&8J0s%Ce~s0xH_G7Wcl&vHrlTdN5|d$X|<~;*&3cFw;v1W)^%q} zlD|ea+on=-R}R|eET>+sxs>>y4%t?zQD)jYvIrI@mjE$(y7eXLg&(E=R)&+a9XAzD zHzI>ojBS3uogPQsf%7OgU0dlz=bT;W>y{Zb^sE~n74uORB1rpxpTyBiH8^Kvia)Ux z*d^2nYsQ=@_MU@P0SEos_Z>~FUJy3^i4x09RQ(^^NHn}w zBLP8aYPb4=6^r^IVrYjMLH_t*SBB$ZeL86tXYpf^CITfAp_Rp`Ccn2(Y}^dnX})OM ze;wOGGvR-pmuC8uVn`h`}(8U z<#7*(MjfD6>H@wKk(fDWE6hZF@Y*&GCj!4=ftVi_WqiVg>@yH4@PazqE0(Qa(G~F+ zBLBrgUQ(2fL?ptKGYb`d9dl@* zV;pqSvQ8qmVhs+C5>DB=!4mJniVG5%bc2kYcE8Jw>&=##} zWVU7x<{0auYQ;lTIx4|+`6tPT9dV;~)y zf&9KYba3h;LBj>Al6k0qbsu})Mq%!paX4icLQ`u9hvtjZ^)WN>DtTeNkgfNeJxBU$x0XCO^W#wGyY6-$T{KB*ZQ60{1#s9CN!5k-JT}|D}%Q z=SSGQCkuP(c46wy)i}(z3|2pO;ryb_aN^zrH%}*=T@(zSmHUwQI~?`%o#1rzGxSbH zBB5p(=Pt6L%~FkGMKDf!={sJYN@s z?}m|(J}QGawpNc6F(&r%HPlXN!{`nXvae*%S>hsodYQtQ9aDGgE*}Yuyuduh_dNE0 zhwEFWQ15RMx_;CTZ*{IgPCb(9$2f5LWD2Zj9DqkCFJ0CNf^g|uoQb-OP{zsBw`8OE zPb1_F^I-WR07S%0o$ zo&5*2&e#vhqE}d(wg7*h-a}evGFBIMV4^z-i(mDF6uhB#@g07*b5MS1G~Q>h+<%nK z==}9U^pqD69Iu7ig?v009f3wLV*|RA!DZTjZ8L%~B6$vRgOwQYT8r2~Z~WV92(GPX zq4kCF5e_4e2x2jT?-`!AIpf{CN6_I|N1`ddC>@lfyiGhNKpJ zko4lxX^(>xy?WzG?yDnlq+kRJDk4;5kP0>DM9hA@2_7wth`#HC3CmS9)v^ow7B8WJ zO=&n6T7_4u8!^<$NBY{uOv@CadG{6R)}6~RlAc19`c@cvs!E~3Qz#++7~WiUL7S2< zCN{2y(8fAMCCoto@v|6E=RmksIL2k2A^MipjypC$?QaM;dJe(;?`yty8>iJ2IgtAbb+2aK-BfcKMaIJ?}D^*S}swl{^m z#31$`a6rK414x^;f8t>9ImQR2%F>ch)P7mcXO17>f-v?bo$1awYTMAa2=4rPse@e_;U?6S9U`r z$C>2?DRRoYj{7k~@S7NeT89GNz8Q-#%|sMT+kq83eQPSiw;LIa&`GLrKCJKaiE!E7-ivE0j5UV4^r0n@6US zmjKg5pRYmlybRnjtH4n7X&By|N#A_ANkCefI3orz`&|NzKTM~Eh6hnd4p{qv%@6q> zz^`oP<<3lkN5xGze*cA~e8c#6{uUI^PNRy1ZqOJXC7ZFjdW7+zf!{JIu91)=d(o$43o#_w)y41t^3`N&o z?46=S;ma4&>O)hB+j<6l<7mcqp+5L>tfs~zp|BnMgo0n4X!s#aM;R=}QJ#(c`fH#p zRSuKA%fNHY9V$;HP`%y(MDjnGF#1xC{Lq1z3`E7`===%RhY{wr^OB z_jZArQZ*J-GaoZin~L@_ecbUO0=$zj!|XBIlZGMZ$m)CX(j^t5{_BpOYmzb-~9~k87JU#Pak2r*Pzj7OUs?PD9b~1$pY<7RQA}M#u*FH zr124AmxX9T&j%idc_YgHClfx5T7-oaMbMt^Ge+@{BsZ9Jd8xn z#rL>SEYP3K@o>mklGr}_ZQ+Pw)chO!`fZV}lo=cacv4%5Kd zWW@I@B)fuXB$&LCN-dRX%vp<6*vt@Pa~!2iK|j1PSV5KEBhX&UOOI~_ zk(AC)6n|evUw)sZ>#NwjL@LYmONwx2v;ikB>QdBBrrG}HBcp-&^sf92PBU&nT{aVQ zzA@%(Z!KP{&W6H`Xt*6`Gci)RIK1Wp&O9}viFFB>zOn&EFURnyz88&=yC|+sgpB-{ zH##pIfz64yU8O)9O*YcKP$Sy7tP5d9FY!Ch7nxGQ*uv)7R+#c(8PoM^D%4TNYKA=9 z{iIUlL`s7G7#H}9?J6cDBfgTtoxiYIg;tcW=BK8UmvM3GI+i2XLp`_?yPaM@Y~FRo zLrK#CB_6uEF^k0*Y4Y^+g2p0cDqFAyQa;A`yh|Uy1AoGAtuNAps$kV+h5e`Gs5&wM zZ=!o3^KTHTbHiZd(1>}DKj6>5AUNtI!|pWm8P{aMUgIa0KJCYs%tT!Mnv7$MrLf?I z1snvoAoWa(j%e<6cn2?qgPAXM4%gs;6Vq|ds6c#iIQlvs;849Z!Xk?ia6Ag;^J7tS z>Nfr*`=D)wE;Q035cM;Nat)(M%gkcm#{S6(D@z1eSQRnb7dP zxH6NE9u>@>n~!YBCOL-n24xh=IG6@2QEJZahnR#iEm~C%7xfW5PxgY-g$r2vPn(>) zYOw34Cp$fF8=Dd8fxwb^6uEaM88*9M;VoX8ns^^Cmdn${n`=@3whtftr0L>zUy}Uy z43q!WLN_7{pRRMz;wyr*ta23f3#7>8t0eh3A42EQ9QyR30$cOE!O!ZIkr`}FRp!9& zNg?hx-axI7H_`7_+4Q18@ii_gJcILIeIPj>Uo2*Ui#3hmuwzxpBj1T6=C9Q2J_Af zp)59?^d3o*&-eK_#Af^}as)}?1`o}tOU37cRLXsK59hZl(3{-3RJ48z{fYw}kaD4| zR?m^w{~1?0T+vha77L!&Q|^E#{l2A4FE?(bpDX6jFYZa4YA_<6Ic$#g{VZA&SBgV9 z8%Z}}KE<8g#Q2#ju+{XYS#9&_ZQ^Ds$PK62Z00#jW+ADqo<=1*USNWw6LtND@vi?K1f#y|96D=?weREb{`?d4Z&-(*btyR3vJ-Kv zU++j{&yph)H%vPr_2LP%tOJ-{>V;KTR$$FXNxTg>kD(QpFg~0Br7zYrH8dZgEPh0J z3Xs#6P(^S@#*xb~}A-rtQgZc9?oK394`qC7Zhp(VZVGZ(h{qZgP29yhw zFzm~V;|}3?DRKgQ31OHre+GWAey!-6BSeHR;=X<=V?e@CS9XUnxCxNeS403~6oo^A zF;TjaoW3g)zuFIUY?dJNV>j`PvFYU=lGIyhM9TKQ`13#m@s6{=@3WL9TzE*oIv!QV z#qikv6Z7AwQ$*Ykd_Ss4B~CT)C?K+W$^K6HJkAyHQtPSLs4L*2^$PzX&tH?o*-T}n zz#s4)E5rFi{@4|qgCWOgsEsP)mG&;Iv-*yCES|2a4}twW#-t8L!K~{YX0Uwve*P!8 zJDtaxY&*>~ZIv`Im>JlWYS^&JTpnaFq z*$kBe=`tSm&rIgAUy-A~ryVFn=qlrtCLmFr$v9^PT*GPGnn3IHT>Kf{Hwf$4`)~6Qd}PPFONd)(VM7i5~D>; zEtr>50#4a(9C=@d6%RWwa7&(Xf7S@Bd4QK}JsVkP<9d}pZhy$eJW)Sr9Uny9NG2r0 zA7Bleu}jDv!N+~xP)(Xbr|$65%`0gzS1yEG%3Lz;VLVOA68e_MM^8_1(o&|0X|y|I zd&5C0+P)mRJI*20lI5(PW)yZN!TG2R9cfCzqLDV}9T6qR(gtMAb0fLy|H04Y8+5B` z8RN~KmET${NzK8Q@hbmlKiZ+;KS^HXUec?+$6VTbA8 zSJQXvXV8sQBz2o1n6LYdxjxKS?VH4&oel8xDuZ8&2yM{&g5;D#gI~kwG?Zn zDJR3Ca18TTtJ5qt>#=xyA9B~6foEej7DsbZr9~yB=w{$>sxE2WbRu1|z2rMNhcufi zV0g?ECyfOuOsNM8vxTYsnl>$9dVq^9JMW>^otBLWkm?)<%ChOk(eH1Ox_kx&q`$7HCpnpe`vG-J*^Q4;YiZn8d!;f=eeCmX8ZjvTqx%PkKS>v;o8zkF@gN6{xf; z($=d12stu`o-~GGcAGnTA1t8bZ%Q$K_!-3P>%gJjj%Ig%95UjkHk(5Hp7s+Q9B+`b zpp5A+%!{2GkJ{%pIAHYwznI2(ShNwbY^Fnqc{%>gyU;tTNiE4dR3Ioxwyrt2-f4%2 zUqdlwJj$5u$Jp!O4TB|KC_A2w{M(Ep)fS;wRqKeO(~ZJg9z$r!0Xr_gc%`jE^^e6!Yhjp}o!nWL|r-nqf__M!&t*qlb>j|;v&9Q zNphdcO)X72l)LB|%N_qg=EoQw)jB~!tqb2*pGE7A1~ymE$ruJLifI~0!Js|Onb(b2 zmcx06sxfgl6m?J4=%l(ft(J*IH}9;}`!At2EHiR%mEw4|T5VoXTj>5YkdILXrrTjD+^yLn-a0LHm83lTsQA zDTyc*$;!%3AM3up_v8LO?#JVP+`r#n@ILQry~cA1>$C#T+c4H<; z&CQ5WORUwYp!p>GHyy+HWU16z6z9i{J>P}F5sQ;7sPx# z0!okPg29V!z&Se<)ZReg$ zeaRm`mN&_>oYKxI1GKv#)ztO|JZ}C8e!lq!9+kfU87E!>6@E!d%&!XkEGPxX*x4ke z_$1Z;EQ4BjB#Bb_Dowe%U!hc4Exqi=8DRPL7wEY19+aMAH4I+mU`U3KTFl}K9Y=Dg zCAHx|;ZPwJb*&W$e;1;*%|=se_Zv}`$tS>q?Snw*{A)1zh1I~k+d{3aG@>3H3#YbN zB~fxF{8X6&>np-uvjuY>0kfwQ*hp zxE}I><#uO*u{crc%hF09W0C^?iK&90R58n?i~@<_Z-FGwE$}{;rk-Xdf}l=WR|YE-Y_0=6sV^W>>JXA)R?O@dFSTfw}X7eKx$5%9I{1T*WJz=b=j!RLz2toOrW za3(n!_-zo7n}n6rJ__zOcf|zI|%l8wFB9n1HhH%0+`-A zpYj6NfY?(bYGQpTHNa7&OqSYF0Wcrz>--2DV;%#uJFH%Y{(YYO!LJmhKHOeQye!6MG3HSihNVe#H&s#Se}WPICb82Gu0l> z)<02o>YkDWHB@*NXqxL$JerTdAXfw^tl@w%WC0pk4A?~G7f`$Z2fUmoN^N~T4B((1 zg}b^y>rg0_%X(-*t%X#~M@gz!wi#sEb^|3=SL$^90QExGlI8k-qg>5=DUTJ&RR7Nq zs$_=_Ras|4d3F6^IZ$07?94@A)L}&(DHEcebsePk7F`6hi#4e1z6eTHBb|D+Qjl_$ zHKC;cvYfX))|Bw^XW;!bdymw8snf?<4!~&xsz&4oc(mCU^tpEc!^f7uQ8ylZt^WZ8 zZvqhZWF0u!%FAk#=26!mE&Q;Gq% z?N|1oo{c0ZW_n!A`T&;Aq8sa4Fal*gg&i zwYJef+_4Ne9PR@9%v6B-bUxS-eGEt%27qT3ec%P370BMt`T*T!YqRGqu=|%SkgC26 z3IjU;>gy6pVk^3oKpA*k?B%45eNGX1t-4 z=C*bqRLW{2i z;BLun@IRQiVs{v8BX$pTRAd6jWOm<}QODwQy`cZHDfM^35o$zCojSQM13YPml*PIb zuvNx`#W#L}yes_Rc33c2GAjbg+%E!yy$dKgR+Ici!xbc*k)*!=c@HA5vp8UVHrT*l z3%X?Mz`tH2@c6b25G~9B19y@@?4}8J4_Xft-w+VPQw!uiJ_dmuyMWlmH7qZ?5XeLy z0H6QuXR+;PhTD!9fg0=GfUq6}AhDL+rDg#%Amjn9-J8(LA z2`JmY1z6Qb0;|9KK=7Y5u+LhET0w<@RV<$9yi8G-8yBv7O;%L{kZNSm{ z7r}_kWgukK3OY{sf{gAdFw4zyJ?=2z;`?#XR>^uLWfTM9zd=BEeIq!aaTRda$x|k4 z*d1^KtBs)kfD0$%fP{cEkVdAU$yR~Y8&9)Xi~^v}I06k3W8i3>0(|M+;022f%s;mt zlyp`DK4CqO+vp7L-0uQs9yEbkB_Htb%~@da$(`N#S4xDsS&N>is6wgIl~ zFM#(I4^Y^22kbW32iB1lR9kTc_&Z$%?qqIZXRt%8UhNn-Atw)PrPhE{w`kzHX~Z!1 zo*bC(D*-wVMuMJ}^`OPm8T{!>2J-)@fb|28Ads^OoO1C2;VH(TMKlU%mdXR`Z91Sv z)(~vYT@TWC7lGNLeDK?i)rEG2vorg5a94K$tEtokz_y>&U<&|!od9q`Y%Pd%Kgx0+ zI4qaoE7+sB3#6>?1ZmOhz_SxOLBiBO@J<8+PDu&)de|1kFlRv7R28^pl>$b6E&v?| zZpzxljxyWV3))Y~Q}@P44I^5EflPKL&_A~gTwJyRJPfb|AjcF$yqf`r@5Vs$GBYrq z{|uawiUU1mTfu$p_u$JpKk!te0-QW`3j8CQibCx1Aq1;jY>QKy6!Q8NqHf*-4{gO88DvNeUR z2S1y^qC>IZd-f1WyA%V;zL|iNDeHjL@*I%;_9?rs_yw$39%gxA9!RkH0N4Inb94@?_B0@rV{ zHJ~;C9DGm<^h8-4gXar)flq_Mrt@IO>k{zsrX8@loej1u88N((@!rs3<7FUy=DXob zD;MzW>U^;7-7-+weHN@^_an}N@?clmC7{V_j;<@`gX*OfV1?2-fIhAVH*z$9=dDn{ z6>k9)tQ&yk1bgqj0L2;=!SiLeR9$ zmEsy-3r^%lgT}}=;6W?=0^<=4aNhL zqIuw#)hUpBHyjLmv$NaU4~9Nnq2RaOcJTFH0uY&vVl}4MLA0Ya=>2mH_ZAL<`i=}RY?%c* zUYk=T-B&X zsbnMg?Ucr1T1DV-N)z~x>nx}`<^`;dhJfNE3Yhkk0KOd<%!I^JZ^tVs zVJdin^&@;4t6?qfNN7%!1ft0kW?`OM$}e)H3U)R)Qk59UnxY`Ae|Jz!4-aN}mK%VsQ7?)q1X>MN8f!57M z8XerZ#K=b46#3nEX)aM5V%~o*K>;OVMC&!x{Ak`0mcQsmuePeee05ySRUuPz4mY;^P?&;9bc?-Odat3;L^1=B_G-1$&9(uU;JN^1Z6kT~u5nfv94pS0r zVbA`{bcyyL{P5>7op^T_G<~lF<@n-ZX>~F!^27}0Brb%F{VH(N>VEitI+?!iq5XbG z_8$Tw8GtCCvb(VC`tKGi`3}ZLHzFk(Qyzbel@F*%fDr)v3?e}2EN1N*;>OkYzF?F3-`KPLB-wP%3vH!>@C_MJden3a-Dw&D zr-Fqkov0EQxi?N5)$w;3nj8{z#i^h+)Oj6{}Vgl!W4{iu!?yg zd|4TaZ=Jb_k|wR5AG(HcD9hKM%nmLWf?&}`CZR>954Q#t{zi zD_}mK`LOJ-3JJHCLBFH^!usj+Xh-`9-L)-&R)wEf32`X4jM|64N9&+4yDiAwXcu|? zE*GxX@Ip-;VMs5)9#$y0;}0Gfj%&w~fEiob@l68CYm3I~W{T(stG6MG^SLOyvYSe+uV=|Bfe>3!#pMAS=s`3IX}f)w!Ug8yYq}A_;$cAjh6)qi)_3{Q8(K_USM~Q#qsXEQCmZNCUq% zP{e7{#-zJllBh}#!qb5vB*OS6d>pQfy6+5;Kh3fzY}}Hmy*0#4h=<}-M?F$~EEaDH zET%(+x?w_S7Bkl;jI7vLNVV_^+U)3Z=o^tq|LXn$6MdFr#4JSoRyx=~vKa1bmW2tc z@6hW;FdTeXN6YTWhK{1qu&maU91NO(R9zyvmUIdqUhshCv5`VvJn49Yqblte-vxt1 z7LZp_Mc8Wp3HmD=k0{`$$|TNHLZx@wko~4KM$#|@8VBi-XaDrjJ&!GDk5dTA7!)Ax zmAm1EJtgGY6+tM(I|+OAHOLX|RG6~%Gu@*xfRiNq;1OLrjQZUunK?t`u~3}E{As11 z=>dobJMnOEFsv?ZN36U^I&Xx4; zQG*KSf8f9kJ5Z&?ZFF?-Dn6N`4w3A8Tvl+KSstrEmd;?}T$PO9H7{bi{}vSbHV{b7Na=AnSCT=TZBj!WNOyqh(nLfdFr_wq4(z7_} zYdLh@cY-)X=Aj02e)23dh7S6%9IXy9MI#~WiJbR-bUc1P8VVGLU8B3O%i?dixKE3i zJ$;2W&74v3peCapca#y|nU4N*Q$qd2TJQkZ1*rOo552e+hKrxBgg!Y8$+C_|V|h>U zs*q>oV~QEIYB~Z7?ls58ZwztVtt>oWwHZ=O4ODD=LL2vPCGM{&EIg0^R~kQtRBScA z_9%dn=T{=jM-M>Pkn^;VZ#4E~&nxK>!Q|dwLH|whgSJPa$@idC^m|tfEIu3nzyC^t zcj`~kQ}Y!`ve{lDvRs0gXFq`amLqH=pklL0R}UNwsKZ|OEQpX{7#r>Oo}4PIfrh8` z5TBea^4KMfk~jI2)>1iiN-F`meMmxjap$lW?>n?zRt_&G(&V$P0pi~$44s_qaKPgX zeCSUz-jK=0kIQ5sqg9gVQ7cVud~|~fRlCV(OfD2IgyfoU1znVG{& z;Q@mJT5?8!ybYuYdt>qCg=cV-MHT*QEr~973ZeM%N;Ewu038pm!mfVOD0250IV*XG zUf8-0%^yjj@$@z9=2y^Q++T-L{ZY=?}L^pTjyY{$Uxn!zd_Ll5DivMfjT6 z(}TD9jZD9*!A;x~MBwdbI945vd`Fid?=!({6iFC7n=XQTH!LTCnVxjcQfZ`Wyn%Xp zx($AODNQte^l?SCJN^020jR@9@5pD{LUp|m_#i$Wn)t^N!5n+MKIj45G|Vp$p!tv8ISVz3<*aB*HNz%BjA{ z@|-wH?V5v~w(Y1a$^^Z7R0>;tv*DX&9{eDr4vE~&#cvumpdPIiXy5j5{Nm6k6Qgw- zyV{hJjcy`n{a;PEul7CgGibu&zP+U7f)^EQV1j!xGLfrPAsQ7a#K*TBg!cy>N$F7; z;(1^ecee?Vw;LC84EH#pQjRi?Q+)u|_v}QCHw&5b&WI8JEstEvCa}A3G`;xt5qxK+ z0InyiP}uAU5p8fH8yGf%beASxq05KNUk##yyIRy4WgW78%|YaCmkSGOB*=n?=_DYM z7o{~6V1906itXu9_`xo8QA8SV7jnU28@`Pk7A|ZZ@8C3nh`p z1N&RK;k{Qcpv=Dp8sy5OhL!J`Lw!crYF#b8Bz%DRyI7Ric}i10>pxSg2TVv#a|YIw zu|&OPd`OFp2~~M0h%BOw@lO{|+`9f9Jj%%Prmcu? zauJlB2_PDV1sEJ#hSu*G!f7#|aY}(M*;=ZM9*J?2w`U(SUn1F9xw>2|XrPWSv-kaT zNFP2};R(-5xG~ep&(WCeJ6K%M0R5-TphoOcxYQ9*||@REdCtJIDTTI zt_>8?9fKvr^F}mj5M;9*8pT+kL@?#5ldHvX8ZYz$%K>)PCI{_(8X1FD5?w{jg&xdLS68A--meF-!R6p&6CW$ z?1cYKyutS^t;p-SV5krq3WJu;Lh{6fgj=0uf>vFCevx`&Y*|7u)B zRyNyf4bo}nijmpSAG{(lj%>j@m~r)YxT$#p-rAgn1}|vfo`Yv#uV@T3iFbz2_o|cW zfAYlqu@>@IltK3P-(cVeN2cLryT7LiAlrSz*2DcIzq#Duo}M;8YSqn;2Q+}`vH zS7fV^XXid+b&XxpeOWI`M&(|@bjf@0y&w-!%#lhYE za3a$RtFgKHV7HS%4sVh{1=3FdWjPNeLCJN=GPO9TqZ;+8`E zTylzBS8*Y$if3?VZ3D5pU&4gcQ)Jt}x7f+H34+Q=d_Uj@bM!_6RC)6soTc8im(<8S&vqqp-~CWC*E{%mXDm7()r0A6Gx(s`E`oNbA{z~7y1P?@EFaQC zL0rY0?gr=JiY); zE`Ed7@2(_^?)a0+zEecPW(am{+>1t>xnSH>AgR(BMf@8ipxQDs%=n$84Vxa(UH&() zLXI+4G*csiyXBGlU2#M^ZO5uYJvb_?f!1i^LUT7}=^9%Fa(MhKQ&n=Ak?xTos_cx& zO)J13*}s_gRrRo6Ef4gCjbhZN57Tl_kf?zv9K6Sby!g9~=pNZjK5u6u{;c&7Zu57_`opxwCISYo2xgqug1m||&V6OS*(o2`lL$h|W%@_O@BTdWc#-{F* z$Va(`>39)D47g*Ebeav>!LdQlm&lL@@ruZG@i=Xf=8dh%dC(*1IvbCEiykKP?JV&7B4LBbgXPfW5`%3v zhoH>VJ-A@*1X9121)b-Af#S#hK(6i{v|(0?V@J<&T+$uL;r0exQ0z|L7DSS-{lTO) zr-U@yJcs?A0%)Jn9s2ktS>!ZVhq+GWk}du(>1;N#wG&>URb8XtcziH=R`nX*x{#0e zER2NfS|ic7d~;Ggcn>{w@FLyZUdU#~0RFNNK+EzO>Zec<*^!||sJtxEjM?GwH3wio^I;0-KLC&kn{+-82?=SDs=eRRr<9%r$)D}EAgM3xJx zll#tyT#ND{t6ijt+sb`-p;jdR8DK%USIChRJ`*JR<1X%v|ARl@T2GEk7{P$;wSZ>yU7|-a!dwM zG*Jqeg=4)B=$J}xV!L|_{aIEV#m8YNePbVSQ9K7jwSU4D+BIb1b4k*b(+$VJu#sIQ zU3jhADx5f$i-)Rv;2vfLx%W{7|9tzCkzSMvQB4h5-|RzMM_{)?# z(+|~OAB1{s!SLj`JW{+=kA#xtaPK!BBFskIZhMu_WESs{JhX;qOJ%!%cl;RY%WsL7C32geC zk2d$UrIU6&fq|Y0u=}wpdTY~0MD#gi%yb&JJn81FTz`gmrdBX3M|$YmcUdUWb_`Zn zHsgss>L}{TXZ-W-7{lXN10RXk(9Rwg;3uA9sQa-POJ7N*h0y_I%(;j^vc>?uPK(i} z#`Eyib`w-X1(Iu?cX7_=)gE1ZfCXLp~8+OI%_iSE7@%A=jk*ChYrqCAWIN9R092KOn zE*2Js6*3DVG?{{;iyUs}FLa+;8FtwG6(&i=;v6GB(&ZY0Lv007K=KBpB)txvnA3v8 z?+x&hne+I%J%LgVT&VZ%5hUy=*1T28pV2i`B)iwI!p^?t__O6{GRfgaTc`NZ5B0BD zIME;4w`S8aZ8SMlZci>u2*N$fztWX1$Ivdvi@8(0xMnSoB6U5zq2#TcnHJYSu*qNCZkDT zfQs#!)H@R+yz31=SypfWTFjR~ADkG@YLO$*d@>WNZeGv0eo#X{8W!OOlk>1wx(jl0 z*CJt;Dtv681hnfHL9zWwP|~0Xi%J`!3#0$x2fy2~=zsOVvN#jY4~inMdU^0U(fOEc z;m5vh~e=}8M7!M1dfpMnxr1Do4{;iY(S4MKdd6(R2>!~2LoqHB$2Hk}F;x@vJ^O2-I z_!~WYdKtYdn1|36x9BgTo1jKUA$?HF7q2Y!gNoTKWl$g=O3Fk-pS)2@yc&^Mr%J5q z;*AH`sNzF(AiY1r1;1I*$$U8W8@o?tAfD$@uT z=$yn-vX&me5uY81*GL%QKQ2mE?;FRpBJXgp8;1!va2^``J%sN&YoW& z`%qVOl)h7!$q9eG3BEotO2>^YB{ABF6z(m?0Y7umkK3>4yB@q`O>G?$lRttx{eNM2 zu!)wS*qz;$94O?{NplKi(L|yg{d{&2tQw7nU)qg0J}kYXo$odLaNit8+~12%y}8Fp zb!~52l+;Iu9Ml4rrNUut_CD&J`~;_y_LzG#K* z5Ek}x;>3K|g=@Dlw2t&E9PH(QQ#vikCLRW_3tmhbZN&a%B<5tNJkAIf?2lIXHeYkErM95UXvmc&q4byk+t#eKhhR&i$N%xi{OOn{mbXNnQgiYUhIm z-`PGdxdh+N`H36!grSV9BXV6)i2KzSl07YH^s8ktC`g6C^0gVH{6Pof@iLekaAhNZ z&&9%)9AQ$Pe~Gj0;$3oWr4F7?@~{NEyWi+80X?be&k1bZcNW>FZ9?|P)^lz=X@YXE^@&Mt8kz}MhD&rs&{eB> zXkO?xbolie@^nipSuUK2eoZch_ZkFYxPCaSc3*%5_$APdXg}QhCm3lK4%6bcyOH5> z4I*~2n0T$72N$bt=?dJN;A4lua~AVP}=@)>WubA{lFaOTqQZ5jbk87~1F`Ni+kWF$#CssM^a_@Y|37 zaFOSI_~BG3lkO{s-xp5f7hpHuaq$UMsXd5y^Yh_t^#bHyhY7Rr!vXU2a{?V7BS{`l z1QDluqNLjHD&D$64YoRYAUh=&r14-Kv^gV<-1Y>)1J`D-b8sE*`1FbKtvZ8Gc{IXr zwU6*}*&$9%i#`fF*9uX(9EaJTjh@?=BKyE*GUX=(NoqJ=`o{n(*l&jl&%LlgTq<6W z4w0$QeYn#1K1NXVN65fPHqacu~&V zuR)3Ds!U1qF#aIp2e~f4M?*N5F#sCm&3=D)^Vt_%+p(9)J#&GR6vKlQ(u|msw|0?x ze;|2KkcueHokV3xG5iCbH}NPxhTlyN;ly!u^!(a<;;p)!G;0iFiCgMK<8&+(Y}Q5j zm0D<9n-E(4T^7!6uY!j+iNR*8JJ@x;H+EIc!m37Lky7B1SyJn&mimWPN! z*@qAD;ix>yA+d>Zdsv5q?WuGGugJP3*6Tz%4OsR)4dE&pHVeF=`k69fC_$$+u zeVj}>+(OmM!%@w(SFk|E9z*eK^gX>vxHVfEZBZJcvfX0Q?TV}Dm#YxboGf9uZh4WH zVG`)zd=XU9)`ON0kK#A!QE=;qtw>98FOzpK5|5gFVfz1JIAb0F3*#@5PJeaM!TFA5 zw8ByTo+QMVnv3n9gwcKri{PovThQz7O2i|(5K0;uGe5SbLiP4bWbWZpJg{4b9IP{i zf7w`OYE>)XYgvU3>$wn>X=7%em;)nX9LxL_yw3E$aKM*8siDqGKXCMcUih%Olze@2 zh@9l(L(1(d$iLxG6gayT>S&xIdrid|`B&M*_UlUgum2~W+EGq2#V^3<(-+~Z2q$#t zTMv}9@x+jh$0Z?G;pvC>p%IiP>v|1|qpk@uSP@3@ytku@kgLq}9y7GNofjr}KB5)8 zF`jqN55*Vd!dqL{F;dLJMwj)y%(r4*B0A?nl#LETqh;>+_V6Q|>>`6+Ycu#x$Qs6W zXgv)l)X;CkHn?e!!AduJP;NSf6tWi(iM218rQkZu{n`riBCbM#KMP3Xlo~M)AAk~X za`2Q-7T((z$J8pR;_`DssL~{Z5oR%jjVmYV2Qtgympj|AWq2VhO@9Z!{}_fv0>YH~ zx7S#1Yzc~bHA_FtY-MbGo2Y12PiU!>jrn9-@s_q48k}gM8`3Wz-*O{Zoh*tT;#%m; zMqxXMr4kjxX)65c9+)2_4xb0IxIr5t)^0_(led%EbI*_7w|o~FijpR3)Ku%U~%rK}o%l-@`WWauLg&+AlEa5JZ={0ICNrVir+)3NMt zVWi-_16>g3M^mZEO`|WPnY1)-xLeT^&)3ouEc94th6PUEc`}6jzZY!otXzcLBO=oI)!2R-k|VchSc*Y0}bNh5zG@#%s!!Kv@Gp zrr|tA984b~@^dNaTGb8@8}`5siD9gEW0L-~tsT3+ZN&PIFQHVBg(j}25$)7q7$N6R zp67pu#uB!udPx_iURPq{9lJ^Uu0G^5?;|{F=1yQmBFRvF0h#p!)Gg6@&C?2cq^%T@ z-%;jd@b?CY_NNf%%|GeC5*2t*_$3sI)gwFyoYBMue{?6E##ND#kf(Dya$swsjU~h-`Eu0fq>F0(eetL+FTNH&M=P6|L#zHscx=&gYC|f*pNbyC$vuwf;K*(K zW5^Jf8YIE!rFzJ`<|@9wES@+{B$DH(f)>5Ye)HfNWF2k>;}yHHuY(|Jw|2wA{pm3C zfG9i}aG312RBdiPJII)ZI}@kFGo)&h8oD;Fl0=BRSkTAIUm~z<56wG`LD0ZNF-Za;3RxA{hl8m2Z